From 16597684916caa133ffbfe94514d96e791eaacc1 Mon Sep 17 00:00:00 2001
From: Michael Deistler <michael.deistler@uni-tuebingen.de>
Date: Fri, 3 Nov 2023 09:59:06 +0100
Subject: [PATCH] Deployed bfe2d2c with MkDocs version: 1.5.3

---
 404.html                                      |  34 +--
 citation/index.html                           |  34 +--
 code_of_conduct/index.html                    |  34 +--
 contribute/index.html                         |  34 +--
 credits/index.html                            |  34 +--
 examples/00_HH_simulator/index.html           |  34 +--
 examples/01_decision_making_model/index.html  |  34 +--
 faq/index.html                                |  34 +--
 faq/question_01/index.html                    |  34 +--
 faq/question_02/index.html                    |  34 +--
 faq/question_03/index.html                    |  34 +--
 faq/question_04/index.html                    |  34 +--
 faq/question_05/index.html                    |  34 +--
 faq/question_06/index.html                    |  34 +--
 faq/question_07/index.html                    |  34 +--
 index.html                                    |  43 ++--
 install/index.html                            |  34 +--
 reference/index.html                          |  34 +--
 search/search_index.json                      |   2 +-
 sitemap.xml                                   |  72 +++---
 sitemap.xml.gz                                | Bin 648 -> 648 bytes
 tutorial/00_getting_started/index.html        | 151 ++----------
 tutorial/01_gaussian_amortized/index.html     |  82 +++----
 tutorial/02_flexible_interface/index.html     |  46 ++--
 tutorial/03_multiround_inference/index.html   |  38 +--
 tutorial/04_density_estimators/index.html     |  38 +--
 tutorial/05_embedding_net/index.html          |  38 +--
 .../07_conditional_distributions/index.html   |  34 +--
 tutorial/08_restriction_estimator/index.html  |  54 ++---
 tutorial/09_sensitivity_analysis/index.html   |  34 +--
 .../10_crafting_summary_statistics/index.html |  54 ++---
 tutorial/11_sampler_interface/index.html      | 219 +++++++-----------
 .../index.html                                |  38 +--
 .../index.html                                |  34 +--
 .../index.html                                | 121 +++++-----
 .../index.html                                |  34 +--
 .../15_mcmc_diagnostics_with_arviz/index.html |  34 +--
 tutorial/16_implemented_methods/index.html    |  38 +--
 .../index.html                                |  34 +--
 tutorial/17_vi_posteriors/index.html          | 134 ++---------
 40 files changed, 600 insertions(+), 1350 deletions(-)
diff --git a/404.html b/404.html
index a5afcb9c4..9a38a1ec9 100644
--- a/404.html
+++ b/404.html
@@ -302,20 +302,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="/sbi/tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="/sbi/tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -331,8 +317,8 @@
   
   
     <li class="md-nav__item">
-      <a href="/sbi/tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="/sbi/tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -406,8 +392,8 @@
   
   
     <li class="md-nav__item">
-      <a href="/sbi/tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="/sbi/tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -448,8 +434,8 @@
   
   
     <li class="md-nav__item">
-      <a href="/sbi/tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="/sbi/tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -462,8 +448,8 @@
   
   
     <li class="md-nav__item">
-      <a href="/sbi/tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="/sbi/tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -476,8 +462,8 @@
   
   
     <li class="md-nav__item">
-      <a href="/sbi/tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="/sbi/tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/citation/index.html b/citation/index.html
index 2ce834eff..2a454de6c 100644
--- a/citation/index.html
+++ b/citation/index.html
@@ -309,20 +309,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -338,8 +324,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -413,8 +399,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -455,8 +441,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -469,8 +455,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -483,8 +469,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/code_of_conduct/index.html b/code_of_conduct/index.html
index 35fc3ace9..c8832011a 100644
--- a/code_of_conduct/index.html
+++ b/code_of_conduct/index.html
@@ -309,20 +309,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -338,8 +324,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -413,8 +399,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -455,8 +441,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -469,8 +455,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -483,8 +469,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/contribute/index.html b/contribute/index.html
index 4f8721707..a4a5a9605 100644
--- a/contribute/index.html
+++ b/contribute/index.html
@@ -309,20 +309,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -338,8 +324,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -413,8 +399,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -455,8 +441,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -469,8 +455,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -483,8 +469,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/credits/index.html b/credits/index.html
index d23fd6256..81702d08a 100644
--- a/credits/index.html
+++ b/credits/index.html
@@ -309,20 +309,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -338,8 +324,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -413,8 +399,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -455,8 +441,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -469,8 +455,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -483,8 +469,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/examples/00_HH_simulator/index.html b/examples/00_HH_simulator/index.html
index da069cdb6..ffb2308a5 100644
--- a/examples/00_HH_simulator/index.html
+++ b/examples/00_HH_simulator/index.html
@@ -311,20 +311,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../../tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -340,8 +326,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -415,8 +401,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -457,8 +443,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -471,8 +457,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -485,8 +471,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/examples/01_decision_making_model/index.html b/examples/01_decision_making_model/index.html
index 64f13e499..7163a42b9 100644
--- a/examples/01_decision_making_model/index.html
+++ b/examples/01_decision_making_model/index.html
@@ -311,20 +311,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../../tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -340,8 +326,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -415,8 +401,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -457,8 +443,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -471,8 +457,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -485,8 +471,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/faq/index.html b/faq/index.html
index 9df49ea77..b01c686c9 100644
--- a/faq/index.html
+++ b/faq/index.html
@@ -309,20 +309,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -338,8 +324,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -413,8 +399,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -455,8 +441,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -469,8 +455,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -483,8 +469,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/faq/question_01/index.html b/faq/question_01/index.html
index 9b83267e5..07b98ec13 100644
--- a/faq/question_01/index.html
+++ b/faq/question_01/index.html
@@ -309,20 +309,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../../tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -338,8 +324,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -413,8 +399,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -455,8 +441,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -469,8 +455,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -483,8 +469,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/faq/question_02/index.html b/faq/question_02/index.html
index 9d8449f24..49bffecba 100644
--- a/faq/question_02/index.html
+++ b/faq/question_02/index.html
@@ -309,20 +309,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../../tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -338,8 +324,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -413,8 +399,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -455,8 +441,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -469,8 +455,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -483,8 +469,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/faq/question_03/index.html b/faq/question_03/index.html
index 11ea0c1ae..14349891b 100644
--- a/faq/question_03/index.html
+++ b/faq/question_03/index.html
@@ -309,20 +309,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../../tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -338,8 +324,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -413,8 +399,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -455,8 +441,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -469,8 +455,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -483,8 +469,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/faq/question_04/index.html b/faq/question_04/index.html
index 03975a645..db6c7b4e3 100644
--- a/faq/question_04/index.html
+++ b/faq/question_04/index.html
@@ -309,20 +309,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../../tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -338,8 +324,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -413,8 +399,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -455,8 +441,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -469,8 +455,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -483,8 +469,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/faq/question_05/index.html b/faq/question_05/index.html
index 198e504f3..af7aa1ef7 100644
--- a/faq/question_05/index.html
+++ b/faq/question_05/index.html
@@ -309,20 +309,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../../tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -338,8 +324,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -413,8 +399,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -455,8 +441,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -469,8 +455,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -483,8 +469,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/faq/question_06/index.html b/faq/question_06/index.html
index 1fffdd075..c567b4e98 100644
--- a/faq/question_06/index.html
+++ b/faq/question_06/index.html
@@ -309,20 +309,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../../tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -338,8 +324,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -413,8 +399,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -455,8 +441,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -469,8 +455,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -483,8 +469,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/faq/question_07/index.html b/faq/question_07/index.html
index c74b488f3..0619e325e 100644
--- a/faq/question_07/index.html
+++ b/faq/question_07/index.html
@@ -309,20 +309,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../../tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -338,8 +324,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../../tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -413,8 +399,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../../tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -455,8 +441,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -469,8 +455,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../../tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -483,8 +469,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/index.html b/index.html
index 0a17728a4..871f116fc 100644
--- a/index.html
+++ b/index.html
@@ -399,20 +399,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -428,8 +414,8 @@
   
   
     <li class="md-nav__item">
-      <a href="tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -503,8 +489,8 @@
   
   
     <li class="md-nav__item">
-      <a href="tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -545,8 +531,8 @@
   
   
     <li class="md-nav__item">
-      <a href="tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -559,8 +545,8 @@
   
   
     <li class="md-nav__item">
-      <a href="tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -573,8 +559,8 @@
   
   
     <li class="md-nav__item">
-      <a href="tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
@@ -1000,10 +986,15 @@
                 <h1 id="sbi-simulation-based-inference"><code>sbi</code>: simulation-based inference<a class="headerlink" href="#sbi-simulation-based-inference" title="Permanent link">&para;</a></h1>
 <p><code>sbi</code>: A Python toolbox for simulation-based inference.</p>
 <p><img alt="using sbi" src="static/infer_demo.gif" /></p>
-<p>Inference can be run in a single line of code:</p>
+<p>Inference can be run in a single line of code</p>
 <div class="highlight"><pre><span></span><code><span class="n">posterior</span> <span class="o">=</span> <span class="n">infer</span><span class="p">(</span><span class="n">simulator</span><span class="p">,</span> <span class="n">prior</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s1">&#39;SNPE&#39;</span><span class="p">,</span> <span class="n">num_simulations</span><span class="o">=</span><span class="mi">1000</span><span class="p">)</span>
 </code></pre></div>
-<p>and you can choose from a variety of <em>amortized</em> and <em>sequential</em> SBI methods.</p>
+<p>or in a few lines for more flexibility:</p>
+<div class="highlight"><pre><span></span><code><span class="n">inference</span> <span class="o">=</span> <span class="n">SNPE</span><span class="p">(</span><span class="n">prior</span><span class="o">=</span><span class="n">prior</span><span class="p">)</span>
+<span class="n">_</span> <span class="o">=</span> <span class="n">inference</span><span class="o">.</span><span class="n">append_simulations</span><span class="p">(</span><span class="n">theta</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">train</span><span class="p">()</span>
+<span class="n">posterior</span> <span class="o">=</span> <span class="n">inference</span><span class="o">.</span><span class="n">build_posterior</span><span class="p">()</span>
+</code></pre></div>
+<p><code>sbi</code> lets you choose from a variety of <em>amortized</em> and <em>sequential</em> SBI methods:</p>
 <p>Amortized methods return a posterior that can be applied to many different observations without retraining,
 whereas sequential methods focus the inference on one particular observation to be more simulation-efficient.
 For an overview of implemented methods see below, or checkout or <a href="https://github.com/mackelab/sbi">GitHub page</a>.</p>
diff --git a/install/index.html b/install/index.html
index 07a4b9903..d779b7f25 100644
--- a/install/index.html
+++ b/install/index.html
@@ -319,20 +319,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -348,8 +334,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -423,8 +409,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -465,8 +451,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -479,8 +465,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -493,8 +479,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/reference/index.html b/reference/index.html
index d368e03aa..bf5e20113 100644
--- a/reference/index.html
+++ b/reference/index.html
@@ -309,20 +309,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../tutorial/01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../tutorial/02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -338,8 +324,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../tutorial/01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -413,8 +399,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../tutorial/11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -455,8 +441,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -469,8 +455,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../tutorial/08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -483,8 +469,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../tutorial/14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../tutorial/10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/search/search_index.json b/search/search_index.json
index 0cb0952cb..16afa2e45 100644
--- a/search/search_index.json
+++ b/search/search_index.json
@@ -1 +1 @@
-{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"sbi : simulation-based inference \u00b6 sbi : A Python toolbox for simulation-based inference. Inference can be run in a single line of code: posterior = infer ( simulator , prior , method = 'SNPE' , num_simulations = 1000 ) and you can choose from a variety of amortized and sequential SBI methods. Amortized methods return a posterior that can be applied to many different observations without retraining, whereas sequential methods focus the inference on one particular observation to be more simulation-efficient. For an overview of implemented methods see below, or checkout or GitHub page . Overview \u00b6 To learn about the general motivation behind simulation-based inference, and the inference methods included in sbi , read on below. For example applications to canonical problems in neuroscience, browse the recent research article Training deep neural density estimators to identify mechanistic models of neural dynamics . If you want to get started using sbi on your own problem, jump to installation and then check out the tutorial . Motivation and approach \u00b6 Many areas of science and engineering make extensive use of complex, stochastic, numerical simulations to describe the structure and dynamics of the processes being investigated. A key challenge in simulation-based science is constraining these simulation models\u2019 parameters, which are intepretable quantities, with observational data. Bayesian inference provides a general and powerful framework to invert the simulators, i.e. describe the parameters which are consistent both with empirical data and prior knowledge. In the case of simulators, a key quantity required for statistical inference, the likelihood of observed data given parameters, \\(\\mathcal{L}(\\theta) = p(x_o|\\theta)\\) , is typically intractable, rendering conventional statistical approaches inapplicable. sbi implements powerful machine-learning methods that address this problem. Roughly, these algorithms can be categorized as: Neural Posterior Estimation (amortized NPE and sequential SNPE ), Neural Likelihood Estimation ( (S)NLE ), and Neural Ratio Estimation ( (S)NRE ). Depending on the characteristics of the problem, e.g. the dimensionalities of the parameter space and the observation space, one of the methods will be more suitable. Goal: Algorithmically identify mechanistic models which are consistent with data. Each of the methods above needs three inputs: A candidate mechanistic model, prior knowledge or constraints on model parameters, and observational data (or summary statistics thereof). The methods then proceed by sampling parameters from the prior followed by simulating synthetic data from these parameters, learning the (probabilistic) association between data (or data features) and underlying parameters, i.e. to learn statistical inference from simulated data. The way in which this association is learned differs between the above methods, but all use deep neural networks. This learned neural network is then applied to empirical data to derive the full space of parameters consistent with the data and the prior, i.e. the posterior distribution. High posterior probability is assigned to parameters which are consistent with both the data and the prior, low probability to inconsistent parameters. While SNPE directly learns the posterior distribution, SNLE and SNRE need an extra MCMC sampling step to construct a posterior. If needed, an initial estimate of the posterior can be used to adaptively generate additional informative simulations. Publications \u00b6 See Cranmer, Brehmer, Louppe (2020) for a recent review on simulation-based inference. The following papers offer additional details on the inference methods implemented in sbi . You can find a tutorial on how to run each of these methods here . Posterior estimation ( (S)NPE ) \u00b6 Fast \u03b5-free Inference of Simulation Models with Bayesian Conditional Density Estimation by Papamakarios & Murray (NeurIPS 2016) [PDF] [BibTeX] Flexible statistical inference for mechanistic models of neural dynamics by Lueckmann, Goncalves, Bassetto, \u00d6cal, Nonnenmacher & Macke (NeurIPS 2017) [PDF] [BibTeX] Automatic posterior transformation for likelihood-free inference by Greenberg, Nonnenmacher & Macke (ICML 2019) [PDF] [BibTeX] Truncated proposals for scalable and hassle-free simulation-based inference by Deistler, Goncalves & Macke (NeurIPS 2022) [Paper] Likelihood-estimation ( (S)NLE ) \u00b6 Sequential neural likelihood: Fast likelihood-free inference with autoregressive flows by Papamakarios, Sterratt & Murray (AISTATS 2019) [PDF] [BibTeX] Variational methods for simulation-based inference by Gl\u00f6ckler, Deistler, Macke (ICLR 2022) [Paper] Flexible and efficient simulation-based inference for models of decision-making by Boelts, Lueckmann, Gao, Macke (Elife 2022) [Paper] Likelihood-ratio-estimation ( (S)NRE ) \u00b6 Likelihood-free MCMC with Amortized Approximate Likelihood Ratios by Hermans, Begy & Louppe (ICML 2020) [PDF] On Contrastive Learning for Likelihood-free Inference Durkan, Murray & Papamakarios (ICML 2020) [PDF] Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation by Delaunoy, Hermans, Rozet, Wehenkel & Louppe (NeurIPS 2022) [PDF] Contrastive Neural Ratio Estimation Benjamin Kurt Miller, Christoph Weniger, Patrick Forr\u00e9 (NeurIPS 2022) [PDF] Utilities \u00b6 Restriction estimator by Deistler, Macke & Goncalves (PNAS 2022) [Paper] Simulation-based calibration by Talts, Betancourt, Simpson, Vehtari, Gelman (arxiv 2018) [Paper] ) Expected coverage (sample-based) as computed in Deistler, Goncalves, Macke [Paper] and in Rozet, Louppe [Paper]","title":"Home"},{"location":"#sbi-simulation-based-inference","text":"sbi : A Python toolbox for simulation-based inference. Inference can be run in a single line of code: posterior = infer ( simulator , prior , method = 'SNPE' , num_simulations = 1000 ) and you can choose from a variety of amortized and sequential SBI methods. Amortized methods return a posterior that can be applied to many different observations without retraining, whereas sequential methods focus the inference on one particular observation to be more simulation-efficient. For an overview of implemented methods see below, or checkout or GitHub page .","title":"sbi: simulation-based inference"},{"location":"#overview","text":"To learn about the general motivation behind simulation-based inference, and the inference methods included in sbi , read on below. For example applications to canonical problems in neuroscience, browse the recent research article Training deep neural density estimators to identify mechanistic models of neural dynamics . If you want to get started using sbi on your own problem, jump to installation and then check out the tutorial .","title":"Overview"},{"location":"#motivation-and-approach","text":"Many areas of science and engineering make extensive use of complex, stochastic, numerical simulations to describe the structure and dynamics of the processes being investigated. A key challenge in simulation-based science is constraining these simulation models\u2019 parameters, which are intepretable quantities, with observational data. Bayesian inference provides a general and powerful framework to invert the simulators, i.e. describe the parameters which are consistent both with empirical data and prior knowledge. In the case of simulators, a key quantity required for statistical inference, the likelihood of observed data given parameters, \\(\\mathcal{L}(\\theta) = p(x_o|\\theta)\\) , is typically intractable, rendering conventional statistical approaches inapplicable. sbi implements powerful machine-learning methods that address this problem. Roughly, these algorithms can be categorized as: Neural Posterior Estimation (amortized NPE and sequential SNPE ), Neural Likelihood Estimation ( (S)NLE ), and Neural Ratio Estimation ( (S)NRE ). Depending on the characteristics of the problem, e.g. the dimensionalities of the parameter space and the observation space, one of the methods will be more suitable. Goal: Algorithmically identify mechanistic models which are consistent with data. Each of the methods above needs three inputs: A candidate mechanistic model, prior knowledge or constraints on model parameters, and observational data (or summary statistics thereof). The methods then proceed by sampling parameters from the prior followed by simulating synthetic data from these parameters, learning the (probabilistic) association between data (or data features) and underlying parameters, i.e. to learn statistical inference from simulated data. The way in which this association is learned differs between the above methods, but all use deep neural networks. This learned neural network is then applied to empirical data to derive the full space of parameters consistent with the data and the prior, i.e. the posterior distribution. High posterior probability is assigned to parameters which are consistent with both the data and the prior, low probability to inconsistent parameters. While SNPE directly learns the posterior distribution, SNLE and SNRE need an extra MCMC sampling step to construct a posterior. If needed, an initial estimate of the posterior can be used to adaptively generate additional informative simulations.","title":"Motivation and approach"},{"location":"#publications","text":"See Cranmer, Brehmer, Louppe (2020) for a recent review on simulation-based inference. The following papers offer additional details on the inference methods implemented in sbi . You can find a tutorial on how to run each of these methods here .","title":"Publications"},{"location":"#posterior-estimation-snpe","text":"Fast \u03b5-free Inference of Simulation Models with Bayesian Conditional Density Estimation by Papamakarios & Murray (NeurIPS 2016) [PDF] [BibTeX] Flexible statistical inference for mechanistic models of neural dynamics by Lueckmann, Goncalves, Bassetto, \u00d6cal, Nonnenmacher & Macke (NeurIPS 2017) [PDF] [BibTeX] Automatic posterior transformation for likelihood-free inference by Greenberg, Nonnenmacher & Macke (ICML 2019) [PDF] [BibTeX] Truncated proposals for scalable and hassle-free simulation-based inference by Deistler, Goncalves & Macke (NeurIPS 2022) [Paper]","title":"Posterior estimation ((S)NPE)"},{"location":"#likelihood-estimation-snle","text":"Sequential neural likelihood: Fast likelihood-free inference with autoregressive flows by Papamakarios, Sterratt & Murray (AISTATS 2019) [PDF] [BibTeX] Variational methods for simulation-based inference by Gl\u00f6ckler, Deistler, Macke (ICLR 2022) [Paper] Flexible and efficient simulation-based inference for models of decision-making by Boelts, Lueckmann, Gao, Macke (Elife 2022) [Paper]","title":"Likelihood-estimation ((S)NLE)"},{"location":"#likelihood-ratio-estimation-snre","text":"Likelihood-free MCMC with Amortized Approximate Likelihood Ratios by Hermans, Begy & Louppe (ICML 2020) [PDF] On Contrastive Learning for Likelihood-free Inference Durkan, Murray & Papamakarios (ICML 2020) [PDF] Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation by Delaunoy, Hermans, Rozet, Wehenkel & Louppe (NeurIPS 2022) [PDF] Contrastive Neural Ratio Estimation Benjamin Kurt Miller, Christoph Weniger, Patrick Forr\u00e9 (NeurIPS 2022) [PDF]","title":"Likelihood-ratio-estimation ((S)NRE)"},{"location":"#utilities","text":"Restriction estimator by Deistler, Macke & Goncalves (PNAS 2022) [Paper] Simulation-based calibration by Talts, Betancourt, Simpson, Vehtari, Gelman (arxiv 2018) [Paper] ) Expected coverage (sample-based) as computed in Deistler, Goncalves, Macke [Paper] and in Rozet, Louppe [Paper]","title":"Utilities"},{"location":"citation/","text":"Citation \u00b6 If you use sbi consider citing the sbi software paper , in addition to the original research articles describing the specific sbi-algorithm(s) you are using. @article { tejero-cantero2020sbi, doi = { 10.21105/joss.02505 } , url = { https://doi.org/10.21105/joss.02505 } , year = { 2020 } , publisher = { The Open Journal } , volume = { 5 } , number = { 52 } , pages = { 2505 } , author = { Alvaro Tejero-Cantero and Jan Boelts and Michael Deistler and Jan-Matthis Lueckmann and Conor Durkan and Pedro J. Gon\u00e7alves and David S. Greenberg and Jakob H. Macke } , title = { sbi: A toolkit for simulation-based inference } , journal = { Journal of Open Source Software } } The above citation refers to the original version of the sbi project and has a persistent DOI. Additionally, new releases of sbi are citable via Zenodo , where we create a new DOI for every release.","title":"Citation"},{"location":"citation/#citation","text":"If you use sbi consider citing the sbi software paper , in addition to the original research articles describing the specific sbi-algorithm(s) you are using. @article { tejero-cantero2020sbi, doi = { 10.21105/joss.02505 } , url = { https://doi.org/10.21105/joss.02505 } , year = { 2020 } , publisher = { The Open Journal } , volume = { 5 } , number = { 52 } , pages = { 2505 } , author = { Alvaro Tejero-Cantero and Jan Boelts and Michael Deistler and Jan-Matthis Lueckmann and Conor Durkan and Pedro J. Gon\u00e7alves and David S. Greenberg and Jakob H. Macke } , title = { sbi: A toolkit for simulation-based inference } , journal = { Journal of Open Source Software } } The above citation refers to the original version of the sbi project and has a persistent DOI. Additionally, new releases of sbi are citable via Zenodo , where we create a new DOI for every release.","title":"Citation"},{"location":"code_of_conduct/","text":"Contributor Covenant Code of Conduct \u00b6 Our Pledge \u00b6 We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. Our Standards \u00b6 Examples of behavior that contributes to a positive environment for our community include: Demonstrating empathy and kindness toward other people Being respectful of differing opinions, viewpoints, and experiences Giving and gracefully accepting constructive feedback Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: The use of sexualized language or imagery, and sexual attention or advances of any kind Trolling, insulting or derogatory comments, and personal or political attacks Public or private harassment Publishing others\u2019 private information, such as a physical or email address, without their explicit permission Other conduct which could reasonably be considered inappropriate in a professional setting Enforcement Responsibilities \u00b6 Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. Scope \u00b6 This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Enforcement \u00b6 Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting sbi developer Jan Boelts via email ( jan.boelts@uni-tuebingen.de ). All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident. Enforcement Guidelines \u00b6 Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: 1. Correction \u00b6 Community Impact : Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. Consequence : A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested. 2. Warning \u00b6 Community Impact : A violation through a single incident or series of actions. Consequence : A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban. 3. Temporary Ban \u00b6 Community Impact : A serious violation of community standards, including sustained inappropriate behavior. Consequence : A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. 4. Permanent Ban \u00b6 Community Impact : Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. Consequence : A permanent ban from any sort of public interaction within the community. Attribution \u00b6 This Code of Conduct is adapted from the Contributor Covenant , version 2.1, available at https://www.contributor-covenant.org/version/2/1/code_of_conduct.html . Community Impact Guidelines were inspired by Mozilla\u2019s code of conduct enforcement ladder . For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq . Translations are available at https://www.contributor-covenant.org/translations .","title":"Code of Conduct"},{"location":"code_of_conduct/#contributor-covenant-code-of-conduct","text":"","title":"Contributor Covenant Code of Conduct"},{"location":"code_of_conduct/#our-pledge","text":"We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.","title":"Our Pledge"},{"location":"code_of_conduct/#our-standards","text":"Examples of behavior that contributes to a positive environment for our community include: Demonstrating empathy and kindness toward other people Being respectful of differing opinions, viewpoints, and experiences Giving and gracefully accepting constructive feedback Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: The use of sexualized language or imagery, and sexual attention or advances of any kind Trolling, insulting or derogatory comments, and personal or political attacks Public or private harassment Publishing others\u2019 private information, such as a physical or email address, without their explicit permission Other conduct which could reasonably be considered inappropriate in a professional setting","title":"Our Standards"},{"location":"code_of_conduct/#enforcement-responsibilities","text":"Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate.","title":"Enforcement Responsibilities"},{"location":"code_of_conduct/#scope","text":"This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.","title":"Scope"},{"location":"code_of_conduct/#enforcement","text":"Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting sbi developer Jan Boelts via email ( jan.boelts@uni-tuebingen.de ). All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident.","title":"Enforcement"},{"location":"code_of_conduct/#enforcement-guidelines","text":"Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct:","title":"Enforcement Guidelines"},{"location":"code_of_conduct/#1-correction","text":"Community Impact : Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. Consequence : A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested.","title":"1. Correction"},{"location":"code_of_conduct/#2-warning","text":"Community Impact : A violation through a single incident or series of actions. Consequence : A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban.","title":"2. Warning"},{"location":"code_of_conduct/#3-temporary-ban","text":"Community Impact : A serious violation of community standards, including sustained inappropriate behavior. Consequence : A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.","title":"3. Temporary Ban"},{"location":"code_of_conduct/#4-permanent-ban","text":"Community Impact : Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. Consequence : A permanent ban from any sort of public interaction within the community.","title":"4. Permanent Ban"},{"location":"code_of_conduct/#attribution","text":"This Code of Conduct is adapted from the Contributor Covenant , version 2.1, available at https://www.contributor-covenant.org/version/2/1/code_of_conduct.html . Community Impact Guidelines were inspired by Mozilla\u2019s code of conduct enforcement ladder . For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq . Translations are available at https://www.contributor-covenant.org/translations .","title":"Attribution"},{"location":"contribute/","text":"User experiences, bugs, and feature requests \u00b6 If you are using sbi to infer the parameters of a simulator, we would be delighted to know how it worked for you. If it didn\u2019t work according to plan, please open up an issue and tell us more about your use case: the dimensionality of the input parameters and of the output, as well as the setup you used to run inference (i.e. number of simulations, number of rounds,\u2026). To report bugs and suggest features (including better documentation), please equally head over to issues on GitHub . Code contributions \u00b6 Contributions to the sbi package are welcome! In general, we use pull requests to make changes to sbi . So, if you are planning to make a contribution, please fork, create a feature branch and then make a PR from your feature branch to the upstream sbi ( details ). To give credits to contributors, we consider adding contributors who repeatedly and substantially contributed to sbi to the list of authors of the package at the end of every year. Additionally, we mention all contributors in the releases. Development environment \u00b6 Clone the repo and install all the dependencies using the environment.yml file to create a conda environment: conda env create -f environment.yml . If you already have an sbi environment and want to refresh dependencies, just run conda env update -f environment.yml --prune . Alternatively, you can install via setup.py using pip install -e \".[dev]\" (the dev flag installs development and testing dependencies). Style conventions \u00b6 For docstrings and comments, we use Google Style . Code needs to pass through the following tools, which are installed alongside sbi : black : Automatic code formatting for Python. You can run black manually from the console using black . in the top directory of the repository, which will format all files. isort : Used to consistently order imports. You can run isort manually from the console using isort in the top directory. pyright : Used for static type checking. black and isort and pyright are checked as part of our CI actions. If these checks fail please make sure you have installed the latest versions for each of them and run them locally. Online documentation \u00b6 Most of the documentation is written in markdown ( basic markdown guide ). You can directly fix mistakes and suggest clearer formulations in markdown files simply by initiating a PR on through GitHub. Click on documentation file and look for the little pencil at top right.","title":"Guide"},{"location":"contribute/#user-experiences-bugs-and-feature-requests","text":"If you are using sbi to infer the parameters of a simulator, we would be delighted to know how it worked for you. If it didn\u2019t work according to plan, please open up an issue and tell us more about your use case: the dimensionality of the input parameters and of the output, as well as the setup you used to run inference (i.e. number of simulations, number of rounds,\u2026). To report bugs and suggest features (including better documentation), please equally head over to issues on GitHub .","title":"User experiences, bugs, and feature requests"},{"location":"contribute/#code-contributions","text":"Contributions to the sbi package are welcome! In general, we use pull requests to make changes to sbi . So, if you are planning to make a contribution, please fork, create a feature branch and then make a PR from your feature branch to the upstream sbi ( details ). To give credits to contributors, we consider adding contributors who repeatedly and substantially contributed to sbi to the list of authors of the package at the end of every year. Additionally, we mention all contributors in the releases.","title":"Code contributions"},{"location":"contribute/#development-environment","text":"Clone the repo and install all the dependencies using the environment.yml file to create a conda environment: conda env create -f environment.yml . If you already have an sbi environment and want to refresh dependencies, just run conda env update -f environment.yml --prune . Alternatively, you can install via setup.py using pip install -e \".[dev]\" (the dev flag installs development and testing dependencies).","title":"Development environment"},{"location":"contribute/#style-conventions","text":"For docstrings and comments, we use Google Style . Code needs to pass through the following tools, which are installed alongside sbi : black : Automatic code formatting for Python. You can run black manually from the console using black . in the top directory of the repository, which will format all files. isort : Used to consistently order imports. You can run isort manually from the console using isort in the top directory. pyright : Used for static type checking. black and isort and pyright are checked as part of our CI actions. If these checks fail please make sure you have installed the latest versions for each of them and run them locally.","title":"Style conventions"},{"location":"contribute/#online-documentation","text":"Most of the documentation is written in markdown ( basic markdown guide ). You can directly fix mistakes and suggest clearer formulations in markdown files simply by initiating a PR on through GitHub. Click on documentation file and look for the little pencil at top right.","title":"Online documentation"},{"location":"credits/","text":"Credits \u00b6 License \u00b6 sbi is licensed under the Affero General Public License version 3 (AGPLv3) and Copyright (C) 2020 \u00c1lvaro Tejero-Cantero, Jakob H. Macke, Jan-Matthis L\u00fcckmann, Michael Deistler, Jan F. B\u00f6lts. Copyright (C) 2020 Conor M. Durkan. Support \u00b6 sbi has been supported by the German Federal Ministry of Education and Research (BMBF) through the project ADIMEM, FKZ 01IS18052 A-D). ADIMEM is a collaborative project between the groups of Jakob Macke (Uni T\u00fcbingen), Philipp Berens (Uni T\u00fcbingen), Philipp Hennig (Uni T\u00fcbingen) and Marcel Oberlaender (caesar Bonn) which aims to develop inference methods for mechanistic models. Important dependencies and prior art \u00b6 sbi is the successor to delfi , a Theano-based toolbox for sequential neural posterior estimation developed at mackelab . If you were using delfi , we strongly recommend to move your inference over to sbi . Please open issues if you find unexpected behaviour or missing features. We will consider these bugs and give them priority. sbi as a PyTorch-based toolbox started as a fork of conormdurkan/lfi , by Conor M.Durkan . sbi uses density estimators from bayesiains/nflows by Conor M.Durkan , George Papamakarios and Artur Bekasov . These are proxied through pyknos , a package focused on density estimation. sbi uses PyTorch and tries to align with the interfaces (e.g. for probability distributions) adopted by PyTorch . See README.md for a list of publications describing the methods implemented in sbi .","title":"Credits"},{"location":"credits/#credits","text":"","title":"Credits"},{"location":"credits/#license","text":"sbi is licensed under the Affero General Public License version 3 (AGPLv3) and Copyright (C) 2020 \u00c1lvaro Tejero-Cantero, Jakob H. Macke, Jan-Matthis L\u00fcckmann, Michael Deistler, Jan F. B\u00f6lts. Copyright (C) 2020 Conor M. Durkan.","title":"License"},{"location":"credits/#support","text":"sbi has been supported by the German Federal Ministry of Education and Research (BMBF) through the project ADIMEM, FKZ 01IS18052 A-D). ADIMEM is a collaborative project between the groups of Jakob Macke (Uni T\u00fcbingen), Philipp Berens (Uni T\u00fcbingen), Philipp Hennig (Uni T\u00fcbingen) and Marcel Oberlaender (caesar Bonn) which aims to develop inference methods for mechanistic models.","title":"Support"},{"location":"credits/#important-dependencies-and-prior-art","text":"sbi is the successor to delfi , a Theano-based toolbox for sequential neural posterior estimation developed at mackelab . If you were using delfi , we strongly recommend to move your inference over to sbi . Please open issues if you find unexpected behaviour or missing features. We will consider these bugs and give them priority. sbi as a PyTorch-based toolbox started as a fork of conormdurkan/lfi , by Conor M.Durkan . sbi uses density estimators from bayesiains/nflows by Conor M.Durkan , George Papamakarios and Artur Bekasov . These are proxied through pyknos , a package focused on density estimation. sbi uses PyTorch and tries to align with the interfaces (e.g. for probability distributions) adopted by PyTorch . See README.md for a list of publications describing the methods implemented in sbi .","title":"Important dependencies and prior art"},{"location":"faq/","text":"Frequently asked questions \u00b6 Can the algorithms deal with invalid data, e.g. NaN or inf? What should I do when my \u2018posterior samples are outside of the prior support\u2019 in SNPE? When using multiple workers, I get a pickling error. Can I still use multiprocessing? Can I use the GPU for training the density estimator? How should I save and load objects in sbi ? Can I stop neural network training and resume it later? How can I use a prior that is not defined in PyTorch?","title":"FAQ"},{"location":"faq/#frequently-asked-questions","text":"Can the algorithms deal with invalid data, e.g. NaN or inf? What should I do when my \u2018posterior samples are outside of the prior support\u2019 in SNPE? When using multiple workers, I get a pickling error. Can I still use multiprocessing? Can I use the GPU for training the density estimator? How should I save and load objects in sbi ? Can I stop neural network training and resume it later? How can I use a prior that is not defined in PyTorch?","title":"Frequently asked questions"},{"location":"install/","text":"Installation \u00b6 sbi requires Python 3.6 or higher. We recommend to use a conda virtual environment ( Miniconda installation instructions ). If conda is installed on the system, an environment for installing sbi can be created as follows: # Create an environment for sbi (indicate Python 3.6 or higher); activate it $ conda create -n sbi_env python=3.7 && conda activate sbi_env Independent of whether you are using conda or not, sbi can be installed using pip : $ pip install sbi To test the installation, drop into a python prompt and run from sbi.examples.minimal import simple posterior = simple () print ( posterior )","title":"Installation"},{"location":"install/#installation","text":"sbi requires Python 3.6 or higher. We recommend to use a conda virtual environment ( Miniconda installation instructions ). If conda is installed on the system, an environment for installing sbi can be created as follows: # Create an environment for sbi (indicate Python 3.6 or higher); activate it $ conda create -n sbi_env python=3.7 && conda activate sbi_env Independent of whether you are using conda or not, sbi can be installed using pip : $ pip install sbi To test the installation, drop into a python prompt and run from sbi.examples.minimal import simple posterior = simple () print ( posterior )","title":"Installation"},{"location":"reference/","text":"API Reference \u00b6 Inference \u00b6 sbi . inference . base . infer ( simulator , prior , method , num_simulations , num_workers = 1 ) \u00b6 Runs simulation-based inference and returns the posterior. This function provides a simple interface to run sbi. Inference is run for a single round and hence the returned posterior \\(p(\\theta|x)\\) can be sampled and evaluated for any \\(x\\) (i.e. it is amortized). The scope of this function is limited to the most essential features of sbi. For more flexibility (e.g. multi-round inference, different density estimators) please use the flexible interface described here: https://www.mackelab.org/sbi/tutorial/02_flexible_interface/ Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\mathrm{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required prior Distribution A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. required method str What inference method to use. Either of SNPE, SNLE or SNRE. required num_simulations int Number of simulation calls. More simulations means a longer runtime, but a better posterior estimate. required num_workers int Number of parallel workers to use for simulations. 1 Returns: Posterior over parameters conditional on observations (amortized). Source code in sbi/inference/base.py def infer ( simulator : Callable , prior : Distribution , method : str , num_simulations : int , num_workers : int = 1 , ) -> NeuralPosterior : r \"\"\"Runs simulation-based inference and returns the posterior. This function provides a simple interface to run sbi. Inference is run for a single round and hence the returned posterior $p(\\theta|x)$ can be sampled and evaluated for any $x$ (i.e. it is amortized). The scope of this function is limited to the most essential features of sbi. For more flexibility (e.g. multi-round inference, different density estimators) please use the flexible interface described here: https://www.mackelab.org/sbi/tutorial/02_flexible_interface/ Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\mathrm{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. method: What inference method to use. Either of SNPE, SNLE or SNRE. num_simulations: Number of simulation calls. More simulations means a longer runtime, but a better posterior estimate. num_workers: Number of parallel workers to use for simulations. Returns: Posterior over parameters conditional on observations (amortized). \"\"\" try : method_fun : Callable = getattr ( sbi . inference , method . upper ()) except AttributeError : raise NameError ( \"Method not available. `method` must be one of 'SNPE', 'SNLE', 'SNRE'.\" ) simulator , prior = prepare_for_sbi ( simulator , prior ) inference = method_fun ( prior = prior ) theta , x = simulate_for_sbi ( simulator = simulator , proposal = prior , num_simulations = num_simulations , num_workers = num_workers , ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () return posterior sbi . utils . user_input_checks . prepare_for_sbi ( simulator , prior ) \u00b6 Prepare simulator and prior for usage in sbi. NOTE: This is a wrapper around process_prior and process_simulator which can be used in isolation as well. Attempts to meet the following requirements by reshaping and type-casting: the simulator function receives as input and returns a Tensor. the simulator can simulate batches of parameters and return batches of data. the prior does not produce batches and samples and evaluates to Tensor. the output shape is a torch.Size((1,N)) (i.e, has a leading batch dimension 1). If this is not possible, a suitable exception will be raised. Parameters: Name Type Description Default simulator Callable Simulator as provided by the user. required prior Prior as provided by the user. required Returns: Type Description Tuple[Callable, torch.distributions.distribution.Distribution] Tuple (simulator, prior) checked and matching the requirements of sbi. Source code in sbi/utils/user_input_checks.py def prepare_for_sbi ( simulator : Callable , prior ) -> Tuple [ Callable , Distribution ]: \"\"\"Prepare simulator and prior for usage in sbi. NOTE: This is a wrapper around `process_prior` and `process_simulator` which can be used in isolation as well. Attempts to meet the following requirements by reshaping and type-casting: - the simulator function receives as input and returns a Tensor.<br/> - the simulator can simulate batches of parameters and return batches of data.<br/> - the prior does not produce batches and samples and evaluates to Tensor.<br/> - the output shape is a `torch.Size((1,N))` (i.e, has a leading batch dimension 1). If this is not possible, a suitable exception will be raised. Args: simulator: Simulator as provided by the user. prior: Prior as provided by the user. Returns: Tuple (simulator, prior) checked and matching the requirements of sbi. \"\"\" # Check prior, return PyTorch prior. prior , _ , prior_returns_numpy = process_prior ( prior ) # Check simulator, returns PyTorch simulator able to simulate batches. simulator = process_simulator ( simulator , prior , prior_returns_numpy ) # Consistency check after making ready for sbi. check_sbi_inputs ( simulator , prior ) return simulator , prior sbi . inference . base . simulate_for_sbi ( simulator , proposal , num_simulations , num_workers = 1 , simulation_batch_size = 1 , seed = None , show_progress_bar = True ) \u00b6 Returns ( \\(\\theta, x\\) ) pairs obtained from sampling the proposal and simulating. This function performs two steps: Sample parameters \\(\\theta\\) from the proposal . Simulate these parameters to obtain \\(x\\) . Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\text{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required proposal Any Probability distribution that the parameters \\(\\theta\\) are sampled from. required num_simulations int Number of simulations that are run. required num_workers int Number of parallel workers to use for simulations. 1 simulation_batch_size int Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). 1 seed Optional[int] Seed for reproducibility. None show_progress_bar bool Whether to show a progress bar for simulating. This will not affect whether there will be a progressbar while drawing samples from the proposal. True Returns: Sampled parameters \\(\\theta\\) and simulation-outputs \\(x\\) . Source code in sbi/inference/base.py def simulate_for_sbi ( simulator : Callable , proposal : Any , num_simulations : int , num_workers : int = 1 , simulation_batch_size : int = 1 , seed : Optional [ int ] = None , show_progress_bar : bool = True , ) -> Tuple [ Tensor , Tensor ]: r \"\"\"Returns ($\\theta, x$) pairs obtained from sampling the proposal and simulating. This function performs two steps: - Sample parameters $\\theta$ from the `proposal`. - Simulate these parameters to obtain $x$. Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\text{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. proposal: Probability distribution that the parameters $\\theta$ are sampled from. num_simulations: Number of simulations that are run. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). seed: Seed for reproducibility. show_progress_bar: Whether to show a progress bar for simulating. This will not affect whether there will be a progressbar while drawing samples from the proposal. Returns: Sampled parameters $\\theta$ and simulation-outputs $x$. \"\"\" theta = proposal . sample (( num_simulations ,)) x = simulate_in_batches ( simulator = simulator , theta = theta , sim_batch_size = simulation_batch_size , num_workers = num_workers , seed = seed , show_progress_bars = show_progress_bar , ) return theta , x sbi.inference.snpe.snpe_a.SNPE_A ( PosteriorEstimator ) \u00b6 __init__ ( self , prior = None , density_estimator = 'mdn_snpe_a' , num_components = 10 , device = 'cpu' , logging_level = 'WARNING' , summary_writer = None , show_progress_bars = True ) special \u00b6 SNPE-A [1]. [1] Fast epsilon-free Inference of Simulation Models with Bayesian Conditional Density Estimation , Papamakarios et al., NeurIPS 2016, https://arxiv.org/abs/1605.06376 . This class implements SNPE-A. SNPE-A trains across multiple rounds with a maximum-likelihood-loss. This will make training converge to the proposal posterior instead of the true posterior. To correct for this, SNPE-A applies a post-hoc correction after training. This correction has to be performed analytically. Thus, SNPE-A is limited to Gaussian distributions for all but the last round. In the last round, SNPE-A can use a Mixture of Gaussians. Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. None density_estimator Union[str, Callable] If it is a string (only \u201cmdn_snpe_a\u201d is valid), use a pre-configured mixture of densities network. Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the density estimator. The density estimator needs to provide the methods .log_prob and .sample() . Note that until the last round only a single (multivariate) Gaussian component is used for training (see Algorithm 1 in [1]). In the last round, this component is replicated num_components times, its parameters are perturbed with a very small noise, and then the last training round is done with the expanded Gaussian mixture as estimator for the proposal posterior. 'mdn_snpe_a' num_components int Number of components of the mixture of Gaussians in the last round. This overrides the num_components value passed to posterior_nn() . 10 device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'WARNING' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is <current working directory>/logs .) None show_progress_bars bool Whether to show a progressbar during training. True Source code in sbi/inference/snpe/snpe_a.py def __init__ ( self , prior : Optional [ Distribution ] = None , density_estimator : Union [ str , Callable ] = \"mdn_snpe_a\" , num_components : int = 10 , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"WARNING\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"SNPE-A [1]. [1] _Fast epsilon-free Inference of Simulation Models with Bayesian Conditional Density Estimation_, Papamakarios et al., NeurIPS 2016, https://arxiv.org/abs/1605.06376. This class implements SNPE-A. SNPE-A trains across multiple rounds with a maximum-likelihood-loss. This will make training converge to the proposal posterior instead of the true posterior. To correct for this, SNPE-A applies a post-hoc correction after training. This correction has to be performed analytically. Thus, SNPE-A is limited to Gaussian distributions for all but the last round. In the last round, SNPE-A can use a Mixture of Gaussians. Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. density_estimator: If it is a string (only \"mdn_snpe_a\" is valid), use a pre-configured mixture of densities network. Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the density estimator. The density estimator needs to provide the methods `.log_prob` and `.sample()`. Note that until the last round only a single (multivariate) Gaussian component is used for training (see Algorithm 1 in [1]). In the last round, this component is replicated `num_components` times, its parameters are perturbed with a very small noise, and then the last training round is done with the expanded Gaussian mixture as estimator for the proposal posterior. num_components: Number of components of the mixture of Gaussians in the last round. This overrides the `num_components` value passed to `posterior_nn()`. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `<current working directory>/logs`.) show_progress_bars: Whether to show a progressbar during training. \"\"\" # Catch invalid inputs. if not (( density_estimator == \"mdn_snpe_a\" ) or callable ( density_estimator )): raise TypeError ( \"The `density_estimator` passed to SNPE_A needs to be a \" \"callable or the string 'mdn_snpe_a'!\" ) # `num_components` will be used to replicate the Gaussian in the last round. self . _num_components = num_components self . _ran_final_round = False # WARNING: sneaky trick ahead. We proxy the parent's `train` here, # requiring the signature to have `num_atoms`, save it for use below, and # continue. It's sneaky because we are using the object (self) as a namespace # to pass arguments between functions, and that's implicit state management. kwargs = utils . del_entries ( locals (), entries = ( \"self\" , \"__class__\" , \"num_components\" ), ) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , proposal = None , exclude_invalid_x = None , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required proposal Optional[sbi.inference.posteriors.direct_posterior.DirectPosterior] The distribution that the parameters \\(\\theta\\) were sampled from. Pass None if the parameters were sampled from the prior. If not None , it will trigger a different loss-function. None exclude_invalid_x Optional[bool] Whether invalid simulations are discarded during training. For single-round SNPE, it is fine to discard invalid simulations, but for multi-round SNPE (atomic), discarding invalid simulations gives systematically wrong results. If None , it will be True in the first round and False in later rounds. None data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description PosteriorEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snpe/snpe_a.py def append_simulations ( self , theta : Tensor , x : Tensor , proposal : Optional [ DirectPosterior ] = None , exclude_invalid_x : Optional [ bool ] = None , data_device : Optional [ str ] = None , ) -> \"PosteriorEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. proposal: The distribution that the parameters $\\theta$ were sampled from. Pass `None` if the parameters were sampled from the prior. If not `None`, it will trigger a different loss-function. exclude_invalid_x: Whether invalid simulations are discarded during training. For single-round SNPE, it is fine to discard invalid simulations, but for multi-round SNPE (atomic), discarding invalid simulations gives systematically wrong results. If `None`, it will be `True` in the first round and `False` in later rounds. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" if ( proposal is None or proposal is self . _prior or ( isinstance ( proposal , RestrictedPrior ) and proposal . _prior is self . _prior ) ): # The `_data_round_index` will later be used to infer if one should train # with MLE loss or with atomic loss (see, in `train()`: # self._round = max(self._data_round_index)) current_round = 0 else : if not self . _data_round_index : # This catches a pretty specific case: if, in the first round, one # passes data that does not come from the prior. current_round = 1 else : current_round = max ( self . _data_round_index ) + 1 if exclude_invalid_x is None : if current_round == 0 : exclude_invalid_x = True else : exclude_invalid_x = False if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x = exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) if ( type ( self ) . __name__ == \"SNPE_C\" and current_round > 0 and not self . use_non_atomic_loss ): nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"Multiround SNPE-C (atomic)\" , ) else : npe_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"Single-round NPE\" ) self . _check_proposal ( proposal ) self . _data_round_index . append ( current_round ) prior_masks = mask_sims_from_prior ( int ( current_round > 0 ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _proposal_roundwise . append ( proposal ) if self . _prior is None or isinstance ( self . _prior , ImproperEmpirical ): if proposal is not None : raise ValueError ( \"You had not passed a prior at initialization, but now you \" \"passed a proposal. If you want to run multi-round SNPE, you have \" \"to specify a prior (set the `.prior` argument or re-initialize \" \"the object with a prior distribution). If the samples you passed \" \"to `append_simulations()` were sampled from the prior, you can \" \"run single-round inference with \" \"`append_simulations(..., proposal=None)`.\" ) theta_prior = self . get_simulations ()[ 0 ] . to ( self . _device ) self . _prior = ImproperEmpirical ( theta_prior , ones ( theta_prior . shape [ 0 ], device = self . _device ) ) return self build_posterior ( self , density_estimator = None , prior = None ) \u00b6 Build posterior from the neural density estimator. This method first corrects the estimated density with correct_for_proposal and then returns a DirectPosterior . Parameters: Name Type Description Default density_estimator Optional[Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None Returns: Type Description DirectPosterior Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods. Source code in sbi/inference/snpe/snpe_a.py def build_posterior ( self , density_estimator : Optional [ TorchModule ] = None , prior : Optional [ Distribution ] = None , ) -> \"DirectPosterior\" : r \"\"\"Build posterior from the neural density estimator. This method first corrects the estimated density with `correct_for_proposal` and then returns a `DirectPosterior`. Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods. \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNPE_A(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior wrapped_density_estimator = self . correct_for_proposal ( density_estimator = density_estimator ) self . _posterior = DirectPosterior ( posterior_estimator = wrapped_density_estimator , # type: ignore prior = prior , ) return deepcopy ( self . _posterior ) correct_for_proposal ( self , density_estimator = None ) \u00b6 Build mixture of Gaussians that approximates the posterior. Returns a SNPE_A_MDN object, which applies the posthoc-correction required in SNPE-A. Parameters: Name Type Description Default density_estimator Optional[Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None Returns: Type Description SNPE_A_MDN Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods. Source code in sbi/inference/snpe/snpe_a.py def correct_for_proposal ( self , density_estimator : Optional [ TorchModule ] = None , ) -> \"SNPE_A_MDN\" : r \"\"\"Build mixture of Gaussians that approximates the posterior. Returns a `SNPE_A_MDN` object, which applies the posthoc-correction required in SNPE-A. Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods. \"\"\" if density_estimator is None : density_estimator = deepcopy ( self . _neural_net ) # PosteriorEstimator.train() also returns a deepcopy, mimic this here # If internal net is used device is defined. device = self . _device else : # Otherwise, infer it from the device of the net parameters. device = str ( next ( density_estimator . parameters ()) . device ) # Set proposal of the density estimator. # This also evokes the z-scoring correction if necessary. if ( self . _proposal_roundwise [ - 1 ] is self . _prior or self . _proposal_roundwise [ - 1 ] is None ): proposal = self . _prior assert isinstance ( proposal , ( MultivariateNormal , utils . BoxUniform ) ), \"\"\"Prior must be `torch.distributions.MultivariateNormal` or `sbi.utils. BoxUniform`\"\"\" else : assert isinstance ( self . _proposal_roundwise [ - 1 ], DirectPosterior ), \"\"\"The proposal you passed to `append_simulations` is neither the prior nor a `DirectPosterior`. SNPE-A currently only supports these scenarios. \"\"\" proposal = self . _proposal_roundwise [ - 1 ] # Create the SNPE_A_MDN wrapped_density_estimator = SNPE_A_MDN ( flow = density_estimator , # type: ignore proposal = proposal , prior = self . _prior , device = device , ) return wrapped_density_estimator get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snpe/snpe_a.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snpe/snpe_a.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , final_round = False , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , calibration_kernel = None , resume_training = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None , component_perturbation = 0.005 ) \u00b6 Return density estimator that approximates the proposal posterior. [1] Fast epsilon-free Inference of Simulation Models with Bayesian Conditional Density Estimation , Papamakarios et al., NeurIPS 2016, https://arxiv.org/abs/1605.06376 . Training is performed with maximum likelihood on samples from the latest round, which leads the algorithm to converge to the proposal posterior. Parameters: Name Type Description Default final_round bool Whether we are in the last round of training or not. For all but the last round, Algorithm 1 from [1] is executed. In last the round, Algorithm 2 from [1] is executed once. False training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 calibration_kernel Optional[Callable] A function to calibrate the loss with respect to the simulations x . See Lueckmann, Gon\u00e7alves et al., NeurIPS 2017. None resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False force_first_round_loss If True , train with maximum likelihood, i.e., potentially ignoring the correction for using a proposal distribution different from the prior. required retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. Not supported for SNPE-A. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None component_perturbation float The standard deviation applied to all weights and biases when, in the last round, the Mixture of Gaussians is build from a single Gaussian. This value can be problem-specific and also depends on the number of mixture components. 0.005 Returns: Type Description Module Density estimator that approximates the distribution \\(p(\\theta|x)\\) . Source code in sbi/inference/snpe/snpe_a.py def train ( self , final_round : bool = False , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , calibration_kernel : Optional [ Callable ] = None , resume_training : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , component_perturbation : float = 5e-3 , ) -> nn . Module : r \"\"\"Return density estimator that approximates the proposal posterior. [1] _Fast epsilon-free Inference of Simulation Models with Bayesian Conditional Density Estimation_, Papamakarios et al., NeurIPS 2016, https://arxiv.org/abs/1605.06376. Training is performed with maximum likelihood on samples from the latest round, which leads the algorithm to converge to the proposal posterior. Args: final_round: Whether we are in the last round of training or not. For all but the last round, Algorithm 1 from [1] is executed. In last the round, Algorithm 2 from [1] is executed once. training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. calibration_kernel: A function to calibrate the loss with respect to the simulations `x`. See Lueckmann, Gon\u00e7alves et al., NeurIPS 2017. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. force_first_round_loss: If `True`, train with maximum likelihood, i.e., potentially ignoring the correction for using a proposal distribution different from the prior. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. Not supported for SNPE-A. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) component_perturbation: The standard deviation applied to all weights and biases when, in the last round, the Mixture of Gaussians is build from a single Gaussian. This value can be problem-specific and also depends on the number of mixture components. Returns: Density estimator that approximates the distribution $p(\\theta|x)$. \"\"\" assert not retrain_from_scratch , \"\"\"Retraining from scratch is not supported in SNPE-A yet. The reason for this is that, if we reininitialized the density estimator, the z-scoring would change, which would break the posthoc correction. This is a pure implementation issue.\"\"\" kwargs = utils . del_entries ( locals (), entries = ( \"self\" , \"__class__\" , \"final_round\" , \"component_perturbation\" , ), ) # SNPE-A always discards the prior samples. kwargs [ \"discard_prior_samples\" ] = True kwargs [ \"force_first_round_loss\" ] = True self . _round = max ( self . _data_round_index ) if final_round : # If there is (will be) only one round, train with Algorithm 2 from [1]. if self . _round == 0 : self . _build_neural_net = partial ( self . _build_neural_net , num_components = self . _num_components ) # Run Algorithm 2 from [1]. elif not self . _ran_final_round : # Now switch to the specified number of components. This method will # only be used if `retrain_from_scratch=True`. Otherwise, # the MDN will be built from replicating the single-component net for # `num_component` times (via `_expand_mog()`). self . _build_neural_net = partial ( self . _build_neural_net , num_components = self . _num_components ) # Extend the MDN to the originally desired number of components. self . _expand_mog ( eps = component_perturbation ) else : warnings . warn ( \"You have already run SNPE-A with `final_round=True`. Running it\" \"again with this setting will not allow computing the posthoc\" \"correction applied in SNPE-A. Thus, you will get an error when \" \"calling `.build_posterior()` after training.\" , UserWarning , ) else : # Run Algorithm 1 from [1]. # Wrap the function that builds the MDN such that we can make # sure that there is only one component when running. self . _build_neural_net = partial ( self . _build_neural_net , num_components = 1 ) if final_round : self . _ran_final_round = True return super () . train ( ** kwargs ) sbi.inference.snpe.snpe_c.SNPE_C ( PosteriorEstimator ) \u00b6 __init__ ( self , prior = None , density_estimator = 'maf' , device = 'cpu' , logging_level = 'WARNING' , summary_writer = None , show_progress_bars = True ) special \u00b6 SNPE-C / APT [1]. [1] Automatic Posterior Transformation for Likelihood-free Inference , Greenberg et al., ICML 2019, https://arxiv.org/abs/1905.07488 . This class implements two loss variants of SNPE-C: the non-atomic and the atomic version. The atomic loss of SNPE-C can be used for any density estimator, i.e. also for normalizing flows. However, it suffers from leakage issues. On the other hand, the non-atomic loss can only be used only if the proposal distribution is a mixture of Gaussians, the density estimator is a mixture of Gaussians, and the prior is either Gaussian or Uniform. It does not suffer from leakage issues. At the beginning of each round, we print whether the non-atomic or the atomic version is used. In this codebase, we will automatically switch to the non-atomic loss if the following criteria are fulfilled: - proposal is a DirectPosterior with density_estimator mdn , as built with utils.sbi.posterior_nn() . - the density estimator is a mdn , as built with utils.sbi.posterior_nn() . - isinstance(prior, MultivariateNormal) (from torch.distributions ) or isinstance(prior, sbi.utils.BoxUniform) Note that custom implementations of any of these densities (or estimators) will not trigger the non-atomic loss, and the algorithm will fall back onto using the atomic loss. Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. None density_estimator Union[str, Callable] If it is a string, use a pre-configured network of the provided type (one of nsf, maf, mdn, made). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the density estimator. The density estimator needs to provide the methods .log_prob and .sample() . 'maf' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'WARNING' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is <current working directory>/logs .) None show_progress_bars bool Whether to show a progressbar during training. True Source code in sbi/inference/snpe/snpe_c.py def __init__ ( self , prior : Optional [ Distribution ] = None , density_estimator : Union [ str , Callable ] = \"maf\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"WARNING\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"SNPE-C / APT [1]. [1] _Automatic Posterior Transformation for Likelihood-free Inference_, Greenberg et al., ICML 2019, https://arxiv.org/abs/1905.07488. This class implements two loss variants of SNPE-C: the non-atomic and the atomic version. The atomic loss of SNPE-C can be used for any density estimator, i.e. also for normalizing flows. However, it suffers from leakage issues. On the other hand, the non-atomic loss can only be used only if the proposal distribution is a mixture of Gaussians, the density estimator is a mixture of Gaussians, and the prior is either Gaussian or Uniform. It does not suffer from leakage issues. At the beginning of each round, we print whether the non-atomic or the atomic version is used. In this codebase, we will automatically switch to the non-atomic loss if the following criteria are fulfilled:<br/> - proposal is a `DirectPosterior` with density_estimator `mdn`, as built with `utils.sbi.posterior_nn()`.<br/> - the density estimator is a `mdn`, as built with `utils.sbi.posterior_nn()`.<br/> - `isinstance(prior, MultivariateNormal)` (from `torch.distributions`) or `isinstance(prior, sbi.utils.BoxUniform)` Note that custom implementations of any of these densities (or estimators) will not trigger the non-atomic loss, and the algorithm will fall back onto using the atomic loss. Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. density_estimator: If it is a string, use a pre-configured network of the provided type (one of nsf, maf, mdn, made). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the density estimator. The density estimator needs to provide the methods `.log_prob` and `.sample()`. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `<current working directory>/logs`.) show_progress_bars: Whether to show a progressbar during training. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , proposal = None , exclude_invalid_x = None , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required proposal Optional[sbi.inference.posteriors.direct_posterior.DirectPosterior] The distribution that the parameters \\(\\theta\\) were sampled from. Pass None if the parameters were sampled from the prior. If not None , it will trigger a different loss-function. None exclude_invalid_x Optional[bool] Whether invalid simulations are discarded during training. For single-round SNPE, it is fine to discard invalid simulations, but for multi-round SNPE (atomic), discarding invalid simulations gives systematically wrong results. If None , it will be True in the first round and False in later rounds. None data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description PosteriorEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snpe/snpe_c.py def append_simulations ( self , theta : Tensor , x : Tensor , proposal : Optional [ DirectPosterior ] = None , exclude_invalid_x : Optional [ bool ] = None , data_device : Optional [ str ] = None , ) -> \"PosteriorEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. proposal: The distribution that the parameters $\\theta$ were sampled from. Pass `None` if the parameters were sampled from the prior. If not `None`, it will trigger a different loss-function. exclude_invalid_x: Whether invalid simulations are discarded during training. For single-round SNPE, it is fine to discard invalid simulations, but for multi-round SNPE (atomic), discarding invalid simulations gives systematically wrong results. If `None`, it will be `True` in the first round and `False` in later rounds. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" if ( proposal is None or proposal is self . _prior or ( isinstance ( proposal , RestrictedPrior ) and proposal . _prior is self . _prior ) ): # The `_data_round_index` will later be used to infer if one should train # with MLE loss or with atomic loss (see, in `train()`: # self._round = max(self._data_round_index)) current_round = 0 else : if not self . _data_round_index : # This catches a pretty specific case: if, in the first round, one # passes data that does not come from the prior. current_round = 1 else : current_round = max ( self . _data_round_index ) + 1 if exclude_invalid_x is None : if current_round == 0 : exclude_invalid_x = True else : exclude_invalid_x = False if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x = exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) if ( type ( self ) . __name__ == \"SNPE_C\" and current_round > 0 and not self . use_non_atomic_loss ): nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"Multiround SNPE-C (atomic)\" , ) else : npe_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"Single-round NPE\" ) self . _check_proposal ( proposal ) self . _data_round_index . append ( current_round ) prior_masks = mask_sims_from_prior ( int ( current_round > 0 ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _proposal_roundwise . append ( proposal ) if self . _prior is None or isinstance ( self . _prior , ImproperEmpirical ): if proposal is not None : raise ValueError ( \"You had not passed a prior at initialization, but now you \" \"passed a proposal. If you want to run multi-round SNPE, you have \" \"to specify a prior (set the `.prior` argument or re-initialize \" \"the object with a prior distribution). If the samples you passed \" \"to `append_simulations()` were sampled from the prior, you can \" \"run single-round inference with \" \"`append_simulations(..., proposal=None)`.\" ) theta_prior = self . get_simulations ()[ 0 ] . to ( self . _device ) self . _prior = ImproperEmpirical ( theta_prior , ones ( theta_prior . shape [ 0 ], device = self . _device ) ) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'rejection' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. For SNPE, the posterior distribution that is returned here implements the following functionality over the raw neural density estimator: - correct the calculation of the log probability such that it compensates for the leakage. - reject samples that lie outside of the prior bounds. - alternatively, if leakage is very high (which can happen for multi-round SNPE), sample from the posterior with MCMC. Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'rejection' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior or DirectPosterior . By default, DirectPosterior is used. Only if rejection_sampling_parameters contains proposal , a RejectionPosterior is instantiated. {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior, sbi.inference.posteriors.direct_posterior.DirectPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snpe/snpe_c.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"rejection\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior , DirectPosterior ]: r \"\"\"Build posterior from the neural density estimator. For SNPE, the posterior distribution that is returned here implements the following functionality over the raw neural density estimator: - correct the calculation of the log probability such that it compensates for the leakage. - reject samples that lie outside of the prior bounds. - alternatively, if leakage is very high (which can happen for multi-round SNPE), sample from the posterior with MCMC. Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior` or `DirectPosterior`. By default, `DirectPosterior` is used. Only if `rejection_sampling_parameters` contains `proposal`, a `RejectionPosterior` is instantiated. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert self . _prior is not None , ( \"You did not pass a prior. You have to pass the prior either at \" \"initialization `inference = SNPE(prior)` or to \" \"`.build_posterior(prior=prior)`.\" ) prior = self . _prior else : utils . check_prior ( prior ) if density_estimator is None : posterior_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : posterior_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = posterior_estimator_based_potential ( posterior_estimator = posterior_estimator , prior = prior , x_o = None , ) if sample_with == \"rejection\" : if \"proposal\" in rejection_sampling_parameters . keys (): self . _posterior = RejectionPosterior ( potential_fn = potential_fn , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) else : self . _posterior = DirectPosterior ( posterior_estimator = posterior_estimator , # type: ignore prior = prior , x_shape = self . _x_shape , device = device , ) elif sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snpe/snpe_c.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snpe/snpe_c.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , num_atoms = 10 , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , calibration_kernel = None , resume_training = False , force_first_round_loss = False , discard_prior_samples = False , use_combined_loss = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) \u00b6 Return density estimator that approximates the distribution \\(p(\\theta|x)\\) . Parameters: Name Type Description Default num_atoms int Number of atoms to use for classification. 10 training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 calibration_kernel Optional[Callable] A function to calibrate the loss with respect to the simulations x . See Lueckmann, Gon\u00e7alves et al., NeurIPS 2017. None resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False force_first_round_loss bool If True , train with maximum likelihood, i.e., potentially ignoring the correction for using a proposal distribution different from the prior. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False use_combined_loss bool Whether to train the neural net also on prior samples using maximum likelihood in addition to training it on all samples using atomic loss. The extra MLE loss helps prevent density leaking with bounded priors. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Module Density estimator that approximates the distribution \\(p(\\theta|x)\\) . Source code in sbi/inference/snpe/snpe_c.py def train ( self , num_atoms : int = 10 , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , calibration_kernel : Optional [ Callable ] = None , resume_training : bool = False , force_first_round_loss : bool = False , discard_prior_samples : bool = False , use_combined_loss : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> nn . Module : r \"\"\"Return density estimator that approximates the distribution $p(\\theta|x)$. Args: num_atoms: Number of atoms to use for classification. training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. calibration_kernel: A function to calibrate the loss with respect to the simulations `x`. See Lueckmann, Gon\u00e7alves et al., NeurIPS 2017. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. force_first_round_loss: If `True`, train with maximum likelihood, i.e., potentially ignoring the correction for using a proposal distribution different from the prior. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. use_combined_loss: Whether to train the neural net also on prior samples using maximum likelihood in addition to training it on all samples using atomic loss. The extra MLE loss helps prevent density leaking with bounded priors. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Density estimator that approximates the distribution $p(\\theta|x)$. \"\"\" # WARNING: sneaky trick ahead. We proxy the parent's `train` here, # requiring the signature to have `num_atoms`, save it for use below, and # continue. It's sneaky because we are using the object (self) as a namespace # to pass arguments between functions, and that's implicit state management. self . _num_atoms = num_atoms self . _use_combined_loss = use_combined_loss kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" , \"num_atoms\" , \"use_combined_loss\" ), ) self . _round = max ( self . _data_round_index ) if self . _round > 0 : # Set the proposal to the last proposal that was passed by the user. For # atomic SNPE, it does not matter what the proposal is. For non-atomic # SNPE, we only use the latest data that was passed, i.e. the one from the # last proposal. proposal = self . _proposal_roundwise [ - 1 ] self . use_non_atomic_loss = ( isinstance ( proposal , DirectPosterior ) and isinstance ( proposal . posterior_estimator . _distribution , mdn ) and isinstance ( self . _neural_net . _distribution , mdn ) and check_dist_class ( self . _prior , class_to_check = ( Uniform , MultivariateNormal ) )[ 0 ] ) algorithm = \"non-atomic\" if self . use_non_atomic_loss else \"atomic\" print ( f \"Using SNPE-C with { algorithm } loss\" ) if self . use_non_atomic_loss : # Take care of z-scoring, pre-compute and store prior terms. self . _set_state_for_mog_proposal () return super () . train ( ** kwargs ) sbi.inference.snle.snle_a.SNLE_A ( LikelihoodEstimator ) \u00b6 __init__ ( self , prior = None , density_estimator = 'maf' , device = 'cpu' , logging_level = 'WARNING' , summary_writer = None , show_progress_bars = True ) special \u00b6 Sequential Neural Likelihood [1]. [1] Sequential Neural Likelihood: Fast Likelihood-free Inference with Autoregressive Flows_, Papamakarios et al., AISTATS 2019, https://arxiv.org/abs/1805.07226 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None density_estimator Union[str, Callable] If it is a string, use a pre-configured network of the provided type (one of nsf, maf, mdn, made). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the density estimator. The density estimator needs to provide the methods .log_prob and .sample() . 'maf' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'WARNING' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is <current working directory>/logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snle/snle_a.py def __init__ ( self , prior : Optional [ Distribution ] = None , density_estimator : Union [ str , Callable ] = \"maf\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"WARNING\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"Sequential Neural Likelihood [1]. [1] Sequential Neural Likelihood: Fast Likelihood-free Inference with Autoregressive Flows_, Papamakarios et al., AISTATS 2019, https://arxiv.org/abs/1805.07226 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. density_estimator: If it is a string, use a pre-configured network of the provided type (one of nsf, maf, mdn, made). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the density estimator. The density estimator needs to provide the methods `.log_prob` and `.sample()`. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `<current working directory>/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNLE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNLE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description LikelihoodEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snle/snle_a.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"LikelihoodEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNLE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNLE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNLE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNLE trains a neural network to approximate the likelihood \\(p(x|\\theta)\\) . The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snle/snle_a.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNLE trains a neural network to approximate the likelihood $p(x|\\theta)$. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNLE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : likelihood_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : likelihood_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = likelihood_estimator_based_potential ( likelihood_estimator = likelihood_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snle/snle_a.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snle/snle_a.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) inherited \u00b6 Train the density estimator to learn the distribution \\(p(x|\\theta)\\) . Parameters: Name Type Description Default resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Flow Density estimator that has learned the distribution \\(p(x|\\theta)\\) . Source code in sbi/inference/snle/snle_a.py def train ( self , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> flows . Flow : r \"\"\"Train the density estimator to learn the distribution $p(x|\\theta)$. Args: resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Density estimator that has learned the distribution $p(x|\\theta)$. \"\"\" # Load data from most recent round. self . _round = max ( self . _data_round_index ) # Starting index for the training set (1 = discard round-0 samples). start_idx = int ( discard_prior_samples and self . _round > 0 ) train_loader , val_loader = self . get_dataloaders ( start_idx , training_batch_size , validation_fraction , resume_training , dataloader_kwargs = dataloader_kwargs , ) # First round or if retraining from scratch: # Call the `self._build_neural_net` with the rounds' thetas and xs as # arguments, which will build the neural network # This is passed into NeuralPosterior, to create a neural posterior which # can `sample()` and `log_prob()`. The network is accessible via `.net`. if self . _neural_net is None or retrain_from_scratch : # Get theta,x to initialize NN theta , x , _ = self . get_simulations ( starting_round = start_idx ) # Use only training data for building the neural net (z-scoring transforms) self . _neural_net = self . _build_neural_net ( theta [ self . train_indices ] . to ( \"cpu\" ), x [ self . train_indices ] . to ( \"cpu\" ), ) self . _x_shape = x_shape_from_simulation ( x . to ( \"cpu\" )) del theta , x assert ( len ( self . _x_shape ) < 3 ), \"SNLE cannot handle multi-dimensional simulator output.\" self . _neural_net . to ( self . _device ) if not resume_training : self . optimizer = optim . Adam ( list ( self . _neural_net . parameters ()), lr = learning_rate , ) self . epoch , self . _val_log_prob = 0 , float ( \"-Inf\" ) while self . epoch <= max_num_epochs and not self . _converged ( self . epoch , stop_after_epochs ): # Train for a single epoch. self . _neural_net . train () train_log_probs_sum = 0 for batch in train_loader : self . optimizer . zero_grad () theta_batch , x_batch = ( batch [ 0 ] . to ( self . _device ), batch [ 1 ] . to ( self . _device ), ) # Evaluate on x with theta as context. train_losses = self . _loss ( theta = theta_batch , x = x_batch ) train_loss = torch . mean ( train_losses ) train_log_probs_sum -= train_losses . sum () . item () train_loss . backward () if clip_max_norm is not None : clip_grad_norm_ ( self . _neural_net . parameters (), max_norm = clip_max_norm , ) self . optimizer . step () self . epoch += 1 train_log_prob_average = train_log_probs_sum / ( len ( train_loader ) * train_loader . batch_size # type: ignore ) self . _summary [ \"training_log_probs\" ] . append ( train_log_prob_average ) # Calculate validation performance. self . _neural_net . eval () val_log_prob_sum = 0 with torch . no_grad (): for batch in val_loader : theta_batch , x_batch = ( batch [ 0 ] . to ( self . _device ), batch [ 1 ] . to ( self . _device ), ) # Evaluate on x with theta as context. val_losses = self . _loss ( theta = theta_batch , x = x_batch ) val_log_prob_sum -= val_losses . sum () . item () # Take mean over all validation samples. self . _val_log_prob = val_log_prob_sum / ( len ( val_loader ) * val_loader . batch_size # type: ignore ) # Log validation log prob for every epoch. self . _summary [ \"validation_log_probs\" ] . append ( self . _val_log_prob ) self . _maybe_show_progress ( self . _show_progress_bars , self . epoch ) self . _report_convergence_at_end ( self . epoch , stop_after_epochs , max_num_epochs ) # Update summary. self . _summary [ \"epochs_trained\" ] . append ( self . epoch ) self . _summary [ \"best_validation_log_prob\" ] . append ( self . _best_val_log_prob ) # Update TensorBoard and summary dict. self . _summarize ( round_ = self . _round ) # Update description for progress bar. if show_train_summary : print ( self . _describe_round ( self . _round , self . _summary )) # Avoid keeping the gradients in the resulting network, which can # cause memory leakage when benchmarking. self . _neural_net . zero_grad ( set_to_none = True ) return deepcopy ( self . _neural_net ) sbi.inference.snre.snre_a.SNRE_A ( RatioEstimator ) \u00b6 __init__ ( self , prior = None , classifier = 'resnet' , device = 'cpu' , logging_level = 'warning' , summary_writer = None , show_progress_bars = True ) special \u00b6 AALR[1], here known as SNRE_A. [1] Likelihood-free MCMC with Amortized Approximate Likelihood Ratios , Hermans et al., ICML 2020, https://arxiv.org/abs/1903.04057 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None classifier Union[str, Callable] Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the classifier. 'resnet' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'warning' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is <current working directory>/logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snre/snre_a.py def __init__ ( self , prior : Optional [ Distribution ] = None , classifier : Union [ str , Callable ] = \"resnet\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"warning\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"AALR[1], here known as SNRE_A. [1] _Likelihood-free MCMC with Amortized Approximate Likelihood Ratios_, Hermans et al., ICML 2020, https://arxiv.org/abs/1903.04057 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. classifier: Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the classifier. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `<current working directory>/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNRE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNRE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description RatioEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snre/snre_a.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"RatioEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNRE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNRE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNRE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the normalized posterior, but sampling still requires MCMC (or rejection sampling). Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note that some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snre/snre_a.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the **normalized** posterior, but sampling still requires MCMC (or rejection sampling). Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note that some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNRE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : ratio_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : ratio_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = ratio_estimator_based_potential ( ratio_estimator = ratio_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snre/snre_a.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snre/snre_a.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None , loss_kwargs = {}) \u00b6 Return classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Parameters: Name Type Description Default training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None loss_kwargs Dict[str, Any] Additional or updated kwargs to be passed to the self._loss fn. {} Returns: Type Description Module Classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Source code in sbi/inference/snre/snre_a.py def train ( self , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , loss_kwargs : Dict [ str , Any ] = {}, ) -> nn . Module : r \"\"\"Return classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. Args: training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) loss_kwargs: Additional or updated kwargs to be passed to the self._loss fn. Returns: Classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. \"\"\" # AALR is defined for `num_atoms=2`. # Proxy to `super().__call__` to ensure right parameter. kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) return super () . train ( ** kwargs , num_atoms = 2 ) sbi.inference.snre.snre_b.SNRE_B ( RatioEstimator ) \u00b6 __init__ ( self , prior = None , classifier = 'resnet' , device = 'cpu' , logging_level = 'warning' , summary_writer = None , show_progress_bars = True ) special \u00b6 SRE[1], here known as SNRE_B. [1] On Contrastive Learning for Likelihood-free Inference , Durkan et al., ICML 2020, https://arxiv.org/pdf/2002.03712 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None classifier Union[str, Callable] Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the classifier. 'resnet' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'warning' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is <current working directory>/logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snre/snre_b.py def __init__ ( self , prior : Optional [ Distribution ] = None , classifier : Union [ str , Callable ] = \"resnet\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"warning\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"SRE[1], here known as SNRE_B. [1] _On Contrastive Learning for Likelihood-free Inference_, Durkan et al., ICML 2020, https://arxiv.org/pdf/2002.03712 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. classifier: Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the classifier. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `<current working directory>/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNRE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNRE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description RatioEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snre/snre_b.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"RatioEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNRE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNRE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNRE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the normalized posterior, but sampling still requires MCMC (or rejection sampling). Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note that some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snre/snre_b.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the **normalized** posterior, but sampling still requires MCMC (or rejection sampling). Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note that some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNRE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : ratio_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : ratio_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = ratio_estimator_based_potential ( ratio_estimator = ratio_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snre/snre_b.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snre/snre_b.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , num_atoms = 10 , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) \u00b6 Return classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Parameters: Name Type Description Default num_atoms int Number of atoms to use for classification. 10 training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Module Classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Source code in sbi/inference/snre/snre_b.py def train ( self , num_atoms : int = 10 , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> nn . Module : r \"\"\"Return classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. Args: num_atoms: Number of atoms to use for classification. training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) return super () . train ( ** kwargs ) sbi.inference.snre.snre_c.SNRE_C ( RatioEstimator ) \u00b6 __init__ ( self , prior = None , classifier = 'resnet' , device = 'cpu' , logging_level = 'warning' , summary_writer = None , show_progress_bars = True ) special \u00b6 NRE-C[1] is a generalization of the non-sequential (amortized) versions of SNRE_A and SNRE_B. We call the algorithm SNRE_C within sbi . NRE-C: (1) like SNRE_B, features a \u201cmulticlass\u201d loss function where several marginally drawn parameter-data pairs are contrasted against a jointly drawn pair. (2) like AALR/NRE_A, i.e., the non-sequential version of SNRE_A, it encourages the approximate ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) , accessed through .potential() within sbi , to be exact at optimum. This addresses the issue that SNRE_B estimates this ratio only up to an arbitrary function (normalizing constant) of the data \\(x\\) . Just like for all ratio estimation algorithms, the sequential version of SNRE_C will be estimated only up to a function (normalizing constant) of the data \\(x\\) in rounds after the first. [1] Contrastive Neural Ratio Estimation , Benajmin Kurt Miller, et. al., NeurIPS 2022, https://arxiv.org/abs/2210.06170 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None classifier Union[str, Callable] Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the classifier. 'resnet' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'warning' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is <current working directory>/logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snre/snre_c.py def __init__ ( self , prior : Optional [ Distribution ] = None , classifier : Union [ str , Callable ] = \"resnet\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"warning\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"NRE-C[1] is a generalization of the non-sequential (amortized) versions of SNRE_A and SNRE_B. We call the algorithm SNRE_C within `sbi`. NRE-C: (1) like SNRE_B, features a \"multiclass\" loss function where several marginally drawn parameter-data pairs are contrasted against a jointly drawn pair. (2) like AALR/NRE_A, i.e., the non-sequential version of SNRE_A, it encourages the approximate ratio $p(\\theta,x)/p(\\theta)p(x)$, accessed through `.potential()` within `sbi`, to be exact at optimum. This addresses the issue that SNRE_B estimates this ratio only up to an arbitrary function (normalizing constant) of the data $x$. Just like for all ratio estimation algorithms, the sequential version of SNRE_C will be estimated only up to a function (normalizing constant) of the data $x$ in rounds after the first. [1] _Contrastive Neural Ratio Estimation_, Benajmin Kurt Miller, et. al., NeurIPS 2022, https://arxiv.org/abs/2210.06170 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. classifier: Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the classifier. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `<current working directory>/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNRE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNRE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description RatioEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snre/snre_c.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"RatioEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNRE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNRE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNRE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the normalized posterior, but sampling still requires MCMC (or rejection sampling). Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note that some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snre/snre_c.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the **normalized** posterior, but sampling still requires MCMC (or rejection sampling). Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note that some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNRE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : ratio_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : ratio_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = ratio_estimator_based_potential ( ratio_estimator = ratio_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snre/snre_c.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snre/snre_c.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , num_classes = 5 , gamma = 1.0 , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) \u00b6 Return classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Parameters: Name Type Description Default num_classes int Number of theta to classify against, corresponds to \\(K\\) in Contrastive Neural Ratio Estimation . Minimum value is 1. Similar to num_atoms for SNRE_B except SNRE_C has an additional independently drawn sample. The total number of alternative parameters NRE-C \u201csees\u201d is \\(2K-1\\) or 2 * num_classes - 1 divided between two loss terms. 5 gamma float Determines the relative weight of the sum of all \\(K\\) dependently drawn classes against the marginally drawn one. Specifically, \\(p(y=k) :=p_K\\) , \\(p(y=0) := p_0\\) , \\(p_0 = 1 - K p_K\\) , and finally \\(\\gamma := K p_K / p_0\\) . 1.0 training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 exclude_invalid_x Whether to exclude simulation outputs x=NaN or x=\u00b1\u221e during training. Expect errors, silent or explicit, when False . required resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Module Classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Source code in sbi/inference/snre/snre_c.py def train ( self , num_classes : int = 5 , gamma : float = 1.0 , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> nn . Module : r \"\"\"Return classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. Args: num_classes: Number of theta to classify against, corresponds to $K$ in _Contrastive Neural Ratio Estimation_. Minimum value is 1. Similar to `num_atoms` for SNRE_B except SNRE_C has an additional independently drawn sample. The total number of alternative parameters `NRE-C` \"sees\" is $2K-1$ or `2 * num_classes - 1` divided between two loss terms. gamma: Determines the relative weight of the sum of all $K$ dependently drawn classes against the marginally drawn one. Specifically, $p(y=k) :=p_K$, $p(y=0) := p_0$, $p_0 = 1 - K p_K$, and finally $\\gamma := K p_K / p_0$. training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. exclude_invalid_x: Whether to exclude simulation outputs `x=NaN` or `x=\u00b1\u221e` during training. Expect errors, silent or explicit, when `False`. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) kwargs [ \"num_atoms\" ] = kwargs . pop ( \"num_classes\" ) + 1 kwargs [ \"loss_kwargs\" ] = { \"gamma\" : kwargs . pop ( \"gamma\" )} return super () . train ( ** kwargs ) sbi.inference.snre.bnre.BNRE ( SNRE_A ) \u00b6 __init__ ( self , prior = None , classifier = 'resnet' , device = 'cpu' , logging_level = 'warning' , summary_writer = None , show_progress_bars = True ) special \u00b6 Balanced neural ratio estimation (BNRE)[1]. BNRE is a variation of NRE aiming to produce more conservative posterior approximations [1] Delaunoy, A., Hermans, J., Rozet, F., Wehenkel, A., & Louppe, G.. Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation. NeurIPS 2022. https://arxiv.org/abs/2208.13624 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None classifier Union[str, Callable] Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations \\((\\theta, x)\\) , which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the classifier. 'resnet' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'warning' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is <current working directory>/logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snre/bnre.py def __init__ ( self , prior : Optional [ Distribution ] = None , classifier : Union [ str , Callable ] = \"resnet\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"warning\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"Balanced neural ratio estimation (BNRE)[1]. BNRE is a variation of NRE aiming to produce more conservative posterior approximations [1] Delaunoy, A., Hermans, J., Rozet, F., Wehenkel, A., & Louppe, G.. Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation. NeurIPS 2022. https://arxiv.org/abs/2208.13624 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. classifier: Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations $(\\theta, x)$, which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the classifier. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `<current working directory>/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNRE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNRE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description RatioEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snre/bnre.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"RatioEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNRE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNRE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNRE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the normalized posterior, but sampling still requires MCMC (or rejection sampling). Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note that some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snre/bnre.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the **normalized** posterior, but sampling still requires MCMC (or rejection sampling). Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note that some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNRE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : ratio_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : ratio_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = ratio_estimator_based_potential ( ratio_estimator = ratio_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snre/bnre.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snre/bnre.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , regularization_strength = 100.0 , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) \u00b6 Return classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Parameters: Name Type Description Default regularization_strength float The multiplicative coefficient applied to the balancing regularizer ( \\(\\lambda\\) ). 100.0 training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 exclude_invalid_x Whether to exclude simulation outputs x=NaN or x=\u00b1\u221e during training. Expect errors, silent or explicit, when False . required resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Module Classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Source code in sbi/inference/snre/bnre.py def train ( self , regularization_strength : float = 100.0 , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> nn . Module : r \"\"\"Return classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. Args: regularization_strength: The multiplicative coefficient applied to the balancing regularizer ($\\lambda$). training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. exclude_invalid_x: Whether to exclude simulation outputs `x=NaN` or `x=\u00b1\u221e` during training. Expect errors, silent or explicit, when `False`. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) kwargs [ \"loss_kwargs\" ] = { \"regularization_strength\" : kwargs . pop ( \"regularization_strength\" ) } return super () . train ( ** kwargs ) sbi.inference.abc.mcabc.MCABC ( ABCBASE ) \u00b6 __call__ ( self , x_o , num_simulations , eps = None , quantile = None , lra = False , sass = False , sass_fraction = 0.25 , sass_expansion_degree = 1 , kde = False , kde_kwargs = {}, return_summary = False ) special \u00b6 Run MCABC and return accepted parameters or KDE object fitted on them. Parameters: Name Type Description Default x_o Union[torch.Tensor, numpy.ndarray] Observed data. required num_simulations int Number of simulations to run. required eps Optional[float] Acceptance threshold \\(\\epsilon\\) for distance between observed and simulated data. None quantile Optional[float] Upper quantile of smallest distances for which the corresponding parameters are returned, e.g, q=0.01 will return the top 1%. Exactly one of quantile or eps have to be passed. None lra bool Whether to run linear regression adjustment as in Beaumont et al. 2002 False sass bool Whether to determine semi-automatic summary statistics as in Fearnhead & Prangle 2012. False sass_fraction float Fraction of simulation budget used for the initial sass run. 0.25 sass_expansion_degree int Degree of the polynomial feature expansion for the sass regression, default 1 - no expansion. 1 kde bool Whether to run KDE on the accepted parameters to return a KDE object from which one can sample. False kde_kwargs Dict[str, Any] kwargs for performing KDE: \u2018bandwidth=\u2019; either a float, or a string naming a bandwidth heuristics, e.g., \u2018cv\u2019 (cross validation), \u2018silvermann\u2019 or \u2018scott\u2019, default \u2018cv\u2019. \u2018transform\u2019: transform applied to the parameters before doing KDE. \u2018sample_weights\u2019: weights associated with samples. See \u2018get_kde\u2019 for more details {} return_summary bool Whether to return the distances and data corresponding to the accepted parameters. False Returns: Type Description theta (if kde False) accepted parameters kde (if kde True): KDE object based on accepted parameters from which one can .sample() and .log_prob(). summary (if summary True): dictionary containing the accepted paramters (if kde True), distances and simulated data x. Source code in sbi/inference/abc/mcabc.py def __call__ ( self , x_o : Union [ Tensor , ndarray ], num_simulations : int , eps : Optional [ float ] = None , quantile : Optional [ float ] = None , lra : bool = False , sass : bool = False , sass_fraction : float = 0.25 , sass_expansion_degree : int = 1 , kde : bool = False , kde_kwargs : Dict [ str , Any ] = {}, return_summary : bool = False , ) -> Union [ Tuple [ Tensor , dict ], Tuple [ KDEWrapper , dict ], Tensor , KDEWrapper ]: r \"\"\"Run MCABC and return accepted parameters or KDE object fitted on them. Args: x_o: Observed data. num_simulations: Number of simulations to run. eps: Acceptance threshold $\\epsilon$ for distance between observed and simulated data. quantile: Upper quantile of smallest distances for which the corresponding parameters are returned, e.g, q=0.01 will return the top 1%. Exactly one of quantile or `eps` have to be passed. lra: Whether to run linear regression adjustment as in Beaumont et al. 2002 sass: Whether to determine semi-automatic summary statistics as in Fearnhead & Prangle 2012. sass_fraction: Fraction of simulation budget used for the initial sass run. sass_expansion_degree: Degree of the polynomial feature expansion for the sass regression, default 1 - no expansion. kde: Whether to run KDE on the accepted parameters to return a KDE object from which one can sample. kde_kwargs: kwargs for performing KDE: 'bandwidth='; either a float, or a string naming a bandwidth heuristics, e.g., 'cv' (cross validation), 'silvermann' or 'scott', default 'cv'. 'transform': transform applied to the parameters before doing KDE. 'sample_weights': weights associated with samples. See 'get_kde' for more details return_summary: Whether to return the distances and data corresponding to the accepted parameters. Returns: theta (if kde False): accepted parameters kde (if kde True): KDE object based on accepted parameters from which one can .sample() and .log_prob(). summary (if summary True): dictionary containing the accepted paramters (if kde True), distances and simulated data x. \"\"\" # Exactly one of eps or quantile need to be passed. assert ( eps is not None ) ^ ( quantile is not None ), \"Eps or quantile must be passed, but not both.\" # Run SASS and change the simulator and x_o accordingly. if sass : num_pilot_simulations = int ( sass_fraction * num_simulations ) self . logger . info ( f \"Running SASS with { num_pilot_simulations } pilot samples.\" ) num_simulations -= num_pilot_simulations pilot_theta = self . prior . sample (( num_pilot_simulations ,)) pilot_x = self . _batched_simulator ( pilot_theta ) sass_transform = self . get_sass_transform ( pilot_theta , pilot_x , sass_expansion_degree ) simulator = lambda theta : sass_transform ( self . _batched_simulator ( theta )) x_o = sass_transform ( x_o ) else : simulator = self . _batched_simulator # Simulate and calculate distances. theta = self . prior . sample (( num_simulations ,)) x = simulator ( theta ) # Infer shape of x to test and set x_o. self . x_shape = x [ 0 ] . unsqueeze ( 0 ) . shape self . x_o = process_x ( x_o , self . x_shape ) distances = self . distance ( self . x_o , x ) # Select based on acceptance threshold epsilon. if eps is not None : is_accepted = distances < eps num_accepted = is_accepted . sum () . item () assert num_accepted > 0 , f \"No parameters accepted, eps= { eps } too small\" theta_accepted = theta [ is_accepted ] distances_accepted = distances [ is_accepted ] x_accepted = x [ is_accepted ] # Select based on quantile on sorted distances. elif quantile is not None : num_top_samples = int ( num_simulations * quantile ) sort_idx = torch . argsort ( distances ) theta_accepted = theta [ sort_idx ][: num_top_samples ] distances_accepted = distances [ sort_idx ][: num_top_samples ] x_accepted = x [ sort_idx ][: num_top_samples ] else : raise ValueError ( \"One of epsilon or quantile has to be passed.\" ) # Maybe adjust theta with LRA. if lra : self . logger . info ( \"Running Linear regression adjustment.\" ) final_theta = self . run_lra ( theta_accepted , x_accepted , observation = self . x_o ) else : final_theta = theta_accepted if kde : self . logger . info ( f \"\"\"KDE on { final_theta . shape [ 0 ] } samples with bandwidth option { kde_kwargs [ \"bandwidth\" ] if \"bandwidth\" in kde_kwargs else \"cv\" } . Beware that KDE can give unreliable results when used with too few samples and in high dimensions.\"\"\" ) kde_dist = get_kde ( final_theta , ** kde_kwargs ) if return_summary : return ( kde_dist , dict ( theta = final_theta , distances = distances_accepted , x = x_accepted ), ) else : return kde_dist elif return_summary : return final_theta , dict ( distances = distances_accepted , x = x_accepted ) else : return final_theta __init__ ( self , simulator , prior , distance = 'l2' , num_workers = 1 , simulation_batch_size = 1 , show_progress_bars = True ) special \u00b6 Monte-Carlo Approximate Bayesian Computation (Rejection ABC) [1]. [1] Pritchard, J. K., Seielstad, M. T., Perez-Lezaun, A., & Feldman, M. W. (1999). Population growth of human Y chromosomes: a study of Y chromosome microsatellites. Molecular biology and evolution, 16(12), 1791-1798. Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\mathrm{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required prior A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. required distance Union[str, Callable] Distance function to compare observed and simulated data. Can be a custom function or one of l1 , l2 , mse . 'l2' num_workers int Number of parallel workers to use for simulations. 1 simulation_batch_size int Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). 1 show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/abc/mcabc.py def __init__ ( self , simulator : Callable , prior , distance : Union [ str , Callable ] = \"l2\" , num_workers : int = 1 , simulation_batch_size : int = 1 , show_progress_bars : bool = True , ): r \"\"\"Monte-Carlo Approximate Bayesian Computation (Rejection ABC) [1]. [1] Pritchard, J. K., Seielstad, M. T., Perez-Lezaun, A., & Feldman, M. W. (1999). Population growth of human Y chromosomes: a study of Y chromosome microsatellites. Molecular biology and evolution, 16(12), 1791-1798. Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\mathrm{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. distance: Distance function to compare observed and simulated data. Can be a custom function or one of `l1`, `l2`, `mse`. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" super () . __init__ ( simulator = simulator , prior = prior , distance = distance , num_workers = num_workers , simulation_batch_size = simulation_batch_size , show_progress_bars = show_progress_bars , ) get_distance_function ( distance_type = 'l2' ) inherited \u00b6 Return distance function for given distance type. Parameters: Name Type Description Default distance_type Union[str, Callable] string indicating the distance type, e.g., \u2018l2\u2019, \u2018l1\u2019, \u2018mse\u2019. Note that the returned distance function averages over the last dimension, e.g., over the summary statistics. 'l2' Returns: Type Description distance_fun distance functions built from passe string. Returns distance_type is callable. Source code in sbi/inference/abc/mcabc.py @staticmethod def get_distance_function ( distance_type : Union [ str , Callable ] = \"l2\" ) -> Callable : \"\"\"Return distance function for given distance type. Args: distance_type: string indicating the distance type, e.g., 'l2', 'l1', 'mse'. Note that the returned distance function averages over the last dimension, e.g., over the summary statistics. Returns: distance_fun: distance functions built from passe string. Returns distance_type is callable. \"\"\" if isinstance ( distance_type , Callable ): return distance_type distances = [ \"l1\" , \"l2\" , \"mse\" ] assert ( distance_type in distances ), f \" { distance_type } must be one of { distances } .\" if distance_type == \"mse\" : distance = lambda xo , x : torch . mean (( xo - x ) ** 2 , dim =- 1 ) elif distance_type == \"l2\" : distance = lambda xo , x : torch . norm (( xo - x ), dim =- 1 ) elif distance_type == \"l1\" : distance = lambda xo , x : torch . mean ( abs ( xo - x ), dim =- 1 ) else : raise ValueError ( r \"Distance {distance_type} not supported.\" ) def distance_fun ( observed_data : Tensor , simulated_data : Tensor ) -> Tensor : \"\"\"Return distance over batch dimension. Args: observed_data: Observed data, could be 1D. simulated_data: Batch of simulated data, has batch dimension. Returns: Torch tensor with batch of distances. \"\"\" assert simulated_data . ndim == 2 , \"simulated data needs batch dimension\" return distance ( observed_data , simulated_data ) return distance_fun get_sass_transform ( theta , x , expansion_degree = 1 , sample_weight = None ) inherited \u00b6 Return semi-automatic summary statitics function. Running weighted linear regressin as in Fearnhead & Prandle 2012: https://arxiv.org/abs/1004.1112 Following implementation in https://abcpy.readthedocs.io/en/latest/_modules/abcpy/statistics.html#Identity and https://pythonhosted.org/abcpy/_modules/abcpy/summaryselections.html#Semiautomatic Source code in sbi/inference/abc/mcabc.py @staticmethod def get_sass_transform ( theta : torch . Tensor , x : torch . Tensor , expansion_degree : int = 1 , sample_weight = None , ) -> Callable : \"\"\"Return semi-automatic summary statitics function. Running weighted linear regressin as in Fearnhead & Prandle 2012: https://arxiv.org/abs/1004.1112 Following implementation in https://abcpy.readthedocs.io/en/latest/_modules/abcpy/statistics.html#Identity and https://pythonhosted.org/abcpy/_modules/abcpy/summaryselections.html#Semiautomatic \"\"\" expansion = PolynomialFeatures ( degree = expansion_degree , include_bias = False ) # Transform x, remove intercept. x_expanded = expansion . fit_transform ( x ) sumstats_map = np . zeros (( x_expanded . shape [ 1 ], theta . shape [ 1 ])) for parameter_idx in range ( theta . shape [ 1 ]): regression_model = LinearRegression ( fit_intercept = True ) regression_model . fit ( X = x_expanded , y = theta [:, parameter_idx ], sample_weight = sample_weight ) sumstats_map [:, parameter_idx ] = regression_model . coef_ sumstats_map = torch . tensor ( sumstats_map , dtype = torch . float32 ) def sumstats_transform ( x ): x_expanded = torch . tensor ( expansion . fit_transform ( x ), dtype = torch . float32 ) return x_expanded . mm ( sumstats_map ) return sumstats_transform run_lra ( theta , x , observation , sample_weight = None ) inherited \u00b6 Return parameters adjusted with linear regression adjustment. Implementation as in Beaumont et al. 2002: https://arxiv.org/abs/1707.01254 Source code in sbi/inference/abc/mcabc.py @staticmethod def run_lra ( theta : torch . Tensor , x : torch . Tensor , observation : torch . Tensor , sample_weight = None , ) -> torch . Tensor : \"\"\"Return parameters adjusted with linear regression adjustment. Implementation as in Beaumont et al. 2002: https://arxiv.org/abs/1707.01254 \"\"\" theta_adjusted = theta for parameter_idx in range ( theta . shape [ 1 ]): regression_model = LinearRegression ( fit_intercept = True ) regression_model . fit ( X = x , y = theta [:, parameter_idx ], sample_weight = sample_weight , ) theta_adjusted [:, parameter_idx ] += regression_model . predict ( observation . reshape ( 1 , - 1 ) ) theta_adjusted [:, parameter_idx ] -= regression_model . predict ( x ) return theta_adjusted sbi.inference.abc.smcabc.SMCABC ( ABCBASE ) \u00b6 __call__ ( self , x_o , num_particles , num_initial_pop , num_simulations , epsilon_decay , distance_based_decay = False , ess_min = None , kernel_variance_scale = 1.0 , use_last_pop_samples = True , return_summary = False , kde = False , kde_kwargs = {}, kde_sample_weights = False , lra = False , lra_with_weights = False , sass = False , sass_fraction = 0.25 , sass_expansion_degree = 1 ) special \u00b6 Run SMCABC and return accepted parameters or KDE object fitted on them. Parameters: Name Type Description Default x_o Union[torch.Tensor, numpy.ndarray] Observed data. required num_particles int Number of particles in each population. required num_initial_pop int Number of simulations used for initial population. required num_simulations int Total number of possible simulations. required epsilon_decay float Factor with which the acceptance threshold \\(\\epsilon\\) decays. required distance_based_decay bool Whether the \\(\\epsilon\\) decay is constant over populations or calculated from the previous populations distribution of distances. False ess_min Optional[float] Threshold of effective sampling size for resampling weights. Not used when None (default). None kernel_variance_scale float Factor for scaling the perturbation kernel variance. 1.0 use_last_pop_samples bool Whether to fill up the current population with samples from the previous population when the budget is used up. If False, the current population is discarded and the previous population is returned. True lra bool Whether to run linear regression adjustment as in Beaumont et al. 2002 False lra_with_weights bool Whether to run lra as weighted linear regression with SMC weights False sass bool Whether to determine semi-automatic summary statistics as in Fearnhead & Prangle 2012. False sass_fraction float Fraction of simulation budget used for the initial sass run. 0.25 sass_expansion_degree int Degree of the polynomial feature expansion for the sass regression, default 1 - no expansion. 1 kde bool Whether to run KDE on the accepted parameters to return a KDE object from which one can sample. False kde_kwargs Dict[str, Any] kwargs for performing KDE: \u2018bandwidth=\u2019; either a float, or a string naming a bandwidth heuristics, e.g., \u2018cv\u2019 (cross validation), \u2018silvermann\u2019 or \u2018scott\u2019, default \u2018cv\u2019. \u2018transform\u2019: transform applied to the parameters before doing KDE. \u2018sample_weights\u2019: weights associated with samples. See \u2018get_kde\u2019 for more details {} kde_sample_weights bool Whether perform weighted KDE with SMC weights or on raw particles. False return_summary bool Whether to return a dictionary with all accepted particles, weights, etc. at the end. False Returns: Type Description theta (if kde False) accepted parameters of the last population. kde (if kde True): KDE object fitted on accepted parameters, from which one can .sample() and .log_prob(). summary (if return_summary True): dictionary containing the accepted paramters (if kde True), distances and simulated data x of all populations. Source code in sbi/inference/abc/smcabc.py def __call__ ( self , x_o : Union [ Tensor , ndarray ], num_particles : int , num_initial_pop : int , num_simulations : int , epsilon_decay : float , distance_based_decay : bool = False , ess_min : Optional [ float ] = None , kernel_variance_scale : float = 1.0 , use_last_pop_samples : bool = True , return_summary : bool = False , kde : bool = False , kde_kwargs : Dict [ str , Any ] = {}, kde_sample_weights : bool = False , lra : bool = False , lra_with_weights : bool = False , sass : bool = False , sass_fraction : float = 0.25 , sass_expansion_degree : int = 1 , ) -> Union [ Tensor , KDEWrapper , Tuple [ Tensor , dict ], Tuple [ KDEWrapper , dict ]]: r \"\"\"Run SMCABC and return accepted parameters or KDE object fitted on them. Args: x_o: Observed data. num_particles: Number of particles in each population. num_initial_pop: Number of simulations used for initial population. num_simulations: Total number of possible simulations. epsilon_decay: Factor with which the acceptance threshold $\\epsilon$ decays. distance_based_decay: Whether the $\\epsilon$ decay is constant over populations or calculated from the previous populations distribution of distances. ess_min: Threshold of effective sampling size for resampling weights. Not used when None (default). kernel_variance_scale: Factor for scaling the perturbation kernel variance. use_last_pop_samples: Whether to fill up the current population with samples from the previous population when the budget is used up. If False, the current population is discarded and the previous population is returned. lra: Whether to run linear regression adjustment as in Beaumont et al. 2002 lra_with_weights: Whether to run lra as weighted linear regression with SMC weights sass: Whether to determine semi-automatic summary statistics as in Fearnhead & Prangle 2012. sass_fraction: Fraction of simulation budget used for the initial sass run. sass_expansion_degree: Degree of the polynomial feature expansion for the sass regression, default 1 - no expansion. kde: Whether to run KDE on the accepted parameters to return a KDE object from which one can sample. kde_kwargs: kwargs for performing KDE: 'bandwidth='; either a float, or a string naming a bandwidth heuristics, e.g., 'cv' (cross validation), 'silvermann' or 'scott', default 'cv'. 'transform': transform applied to the parameters before doing KDE. 'sample_weights': weights associated with samples. See 'get_kde' for more details kde_sample_weights: Whether perform weighted KDE with SMC weights or on raw particles. return_summary: Whether to return a dictionary with all accepted particles, weights, etc. at the end. Returns: theta (if kde False): accepted parameters of the last population. kde (if kde True): KDE object fitted on accepted parameters, from which one can .sample() and .log_prob(). summary (if return_summary True): dictionary containing the accepted paramters (if kde True), distances and simulated data x of all populations. \"\"\" pop_idx = 0 self . num_simulations = num_simulations # Pilot run for SASS. if sass : num_pilot_simulations = int ( sass_fraction * num_simulations ) self . logger . info ( f \"Running SASS with { num_pilot_simulations } pilot samples.\" ) sass_transform = self . run_sass_set_xo ( num_particles , num_pilot_simulations , x_o , lra , sass_expansion_degree ) # Udpate simulator and xo x_o = sass_transform ( self . x_o ) def sass_simulator ( theta ): self . simulation_counter += theta . shape [ 0 ] return sass_transform ( self . _batched_simulator ( theta )) self . _simulate_with_budget = sass_simulator # run initial population particles , epsilon , distances , x = self . _set_xo_and_sample_initial_population ( x_o , num_particles , num_initial_pop ) log_weights = torch . log ( 1 / num_particles * ones ( num_particles )) self . logger . info ( ( f \"population= { pop_idx } , eps= { epsilon } , ess= { 1.0 } , \" f \"num_sims= { num_initial_pop } \" ) ) all_particles = [ particles ] all_log_weights = [ log_weights ] all_distances = [ distances ] all_epsilons = [ epsilon ] all_x = [ x ] while self . simulation_counter < self . num_simulations : pop_idx += 1 # Decay based on quantile of distances from previous pop. if distance_based_decay : epsilon = self . _get_next_epsilon ( all_distances [ pop_idx - 1 ], epsilon_decay ) # Constant decay. else : epsilon *= epsilon_decay # Get kernel variance from previous pop. self . kernel_variance = self . get_kernel_variance ( all_particles [ pop_idx - 1 ], torch . exp ( all_log_weights [ pop_idx - 1 ]), samples_per_dim = 500 , kernel_variance_scale = kernel_variance_scale , ) particles , log_weights , distances , x = self . _sample_next_population ( particles = all_particles [ pop_idx - 1 ], log_weights = all_log_weights [ pop_idx - 1 ], distances = all_distances [ pop_idx - 1 ], epsilon = epsilon , x = all_x [ pop_idx - 1 ], use_last_pop_samples = use_last_pop_samples , ) # Resample population if effective sampling size is too small. if ess_min is not None : particles , log_weights = self . resample_if_ess_too_small ( particles , log_weights , ess_min , pop_idx ) self . logger . info ( ( f \"population= { pop_idx } done: eps= { epsilon : .6f } ,\" f \" num_sims= { self . simulation_counter } .\" ) ) # collect results all_particles . append ( particles ) all_log_weights . append ( log_weights ) all_distances . append ( distances ) all_epsilons . append ( epsilon ) all_x . append ( x ) # Maybe run LRA and adjust weights. if lra : self . logger . info ( \"Running Linear regression adjustment.\" ) adjusted_particles , adjusted_weights = self . run_lra_update_weights ( particles = all_particles [ - 1 ], xs = all_x [ - 1 ], observation = process_x ( x_o ), log_weights = all_log_weights [ - 1 ], lra_with_weights = lra_with_weights , ) final_particles = adjusted_particles else : final_particles = all_particles [ - 1 ] if kde : self . logger . info ( f \"\"\"KDE on { final_particles . shape [ 0 ] } samples with bandwidth option { kde_kwargs [ \"bandwidth\" ] if \"bandwidth\" in kde_kwargs else \"cv\" } . Beware that KDE can give unreliable results when used with too few samples and in high dimensions.\"\"\" ) # Maybe get particles weights from last population for weighted KDE. if kde_sample_weights : kde_kwargs [ \"sample_weights\" ] = all_log_weights [ - 1 ] . exp () kde_dist = get_kde ( final_particles , ** kde_kwargs ) if return_summary : return ( kde_dist , dict ( particles = all_particles , weights = all_log_weights , epsilons = all_epsilons , distances = all_distances , xs = all_x , ), ) else : return kde_dist if return_summary : return ( final_particles , dict ( particles = all_particles , weights = all_log_weights , epsilons = all_epsilons , distances = all_distances , xs = all_x , ), ) else : return final_particles __init__ ( self , simulator , prior , distance = 'l2' , num_workers = 1 , simulation_batch_size = 1 , show_progress_bars = True , kernel = 'gaussian' , algorithm_variant = 'C' ) special \u00b6 Sequential Monte Carlo Approximate Bayesian Computation. We distinguish between three different SMC methods here: - A: Toni et al. 2010 (Phd Thesis) - B: Sisson et al. 2007 (with correction from 2009) - C: Beaumont et al. 2009 In Toni et al. 2010 we find an overview of the differences on page 34: - B: same as A except for resampling of weights if the effective sampling size is too small. - C: same as A except for calculation of the covariance of the perturbation kernel: the kernel covariance is a scaled version of the covariance of the previous population. Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\mathrm{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required prior Distribution A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. required distance Union[str, Callable] Distance function to compare observed and simulated data. Can be a custom function or one of l1 , l2 , mse . 'l2' num_workers int Number of parallel workers to use for simulations. 1 simulation_batch_size int Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). 1 show_progress_bars bool Whether to show a progressbar during simulation and sampling. True kernel Optional[str] Perturbation kernel. 'gaussian' algorithm_variant str Indicating the choice of algorithm variant, A, B, or C. 'C' Source code in sbi/inference/abc/smcabc.py def __init__ ( self , simulator : Callable , prior : Distribution , distance : Union [ str , Callable ] = \"l2\" , num_workers : int = 1 , simulation_batch_size : int = 1 , show_progress_bars : bool = True , kernel : Optional [ str ] = \"gaussian\" , algorithm_variant : str = \"C\" , ): r \"\"\"Sequential Monte Carlo Approximate Bayesian Computation. We distinguish between three different SMC methods here: - A: Toni et al. 2010 (Phd Thesis) - B: Sisson et al. 2007 (with correction from 2009) - C: Beaumont et al. 2009 In Toni et al. 2010 we find an overview of the differences on page 34: - B: same as A except for resampling of weights if the effective sampling size is too small. - C: same as A except for calculation of the covariance of the perturbation kernel: the kernel covariance is a scaled version of the covariance of the previous population. Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\mathrm{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. distance: Distance function to compare observed and simulated data. Can be a custom function or one of `l1`, `l2`, `mse`. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). show_progress_bars: Whether to show a progressbar during simulation and sampling. kernel: Perturbation kernel. algorithm_variant: Indicating the choice of algorithm variant, A, B, or C. \"\"\" super () . __init__ ( simulator = simulator , prior = prior , distance = distance , num_workers = num_workers , simulation_batch_size = simulation_batch_size , show_progress_bars = show_progress_bars , ) kernels = ( \"gaussian\" , \"uniform\" ) assert ( kernel in kernels ), f \"Kernel ' { kernel } ' not supported. Choose one from { kernels } .\" self . kernel = kernel algorithm_variants = ( \"A\" , \"B\" , \"C\" ) assert algorithm_variant in algorithm_variants , ( f \"SMCABC variant ' { algorithm_variant } ' not supported, choose one from\" \" {algorithm_variants} .\" ) self . algorithm_variant = algorithm_variant self . distance_to_x0 = None self . simulation_counter = 0 self . num_simulations = 0 # Define simulator that keeps track of budget. def simulate_with_budget ( theta ): self . simulation_counter += theta . shape [ 0 ] return self . _batched_simulator ( theta ) self . _simulate_with_budget = simulate_with_budget get_distance_function ( distance_type = 'l2' ) inherited \u00b6 Return distance function for given distance type. Parameters: Name Type Description Default distance_type Union[str, Callable] string indicating the distance type, e.g., \u2018l2\u2019, \u2018l1\u2019, \u2018mse\u2019. Note that the returned distance function averages over the last dimension, e.g., over the summary statistics. 'l2' Returns: Type Description distance_fun distance functions built from passe string. Returns distance_type is callable. Source code in sbi/inference/abc/smcabc.py @staticmethod def get_distance_function ( distance_type : Union [ str , Callable ] = \"l2\" ) -> Callable : \"\"\"Return distance function for given distance type. Args: distance_type: string indicating the distance type, e.g., 'l2', 'l1', 'mse'. Note that the returned distance function averages over the last dimension, e.g., over the summary statistics. Returns: distance_fun: distance functions built from passe string. Returns distance_type is callable. \"\"\" if isinstance ( distance_type , Callable ): return distance_type distances = [ \"l1\" , \"l2\" , \"mse\" ] assert ( distance_type in distances ), f \" { distance_type } must be one of { distances } .\" if distance_type == \"mse\" : distance = lambda xo , x : torch . mean (( xo - x ) ** 2 , dim =- 1 ) elif distance_type == \"l2\" : distance = lambda xo , x : torch . norm (( xo - x ), dim =- 1 ) elif distance_type == \"l1\" : distance = lambda xo , x : torch . mean ( abs ( xo - x ), dim =- 1 ) else : raise ValueError ( r \"Distance {distance_type} not supported.\" ) def distance_fun ( observed_data : Tensor , simulated_data : Tensor ) -> Tensor : \"\"\"Return distance over batch dimension. Args: observed_data: Observed data, could be 1D. simulated_data: Batch of simulated data, has batch dimension. Returns: Torch tensor with batch of distances. \"\"\" assert simulated_data . ndim == 2 , \"simulated data needs batch dimension\" return distance ( observed_data , simulated_data ) return distance_fun get_new_kernel ( self , thetas ) \u00b6 Return new kernel distribution for a given set of paramters. Source code in sbi/inference/abc/smcabc.py def get_new_kernel ( self , thetas : Tensor ) -> Distribution : \"\"\"Return new kernel distribution for a given set of paramters.\"\"\" if self . kernel == \"gaussian\" : assert self . kernel_variance . ndim == 2 return MultivariateNormal ( loc = thetas , covariance_matrix = self . kernel_variance ) elif self . kernel == \"uniform\" : low = thetas - self . kernel_variance high = thetas + self . kernel_variance # Move batch shape to event shape to get Uniform that is multivariate in # parameter dimension. return Uniform ( low = low , high = high ) . to_event ( 1 ) else : raise ValueError ( f \"Kernel, ' { self . kernel } ' not supported.\" ) get_particle_ranges ( self , particles , weights , samples_per_dim = 100 ) \u00b6 Return range of particles in each parameter dimension. Source code in sbi/inference/abc/smcabc.py def get_particle_ranges ( self , particles : Tensor , weights : Tensor , samples_per_dim : int = 100 ) -> Tensor : \"\"\"Return range of particles in each parameter dimension.\"\"\" # get weighted samples samples = self . sample_from_population_with_weights ( particles , weights , num_samples = samples_per_dim * particles . shape [ 1 ], ) # Variance spans the range of particles for every dimension. particle_ranges = samples . max ( 0 ) . values - samples . min ( 0 ) . values assert particle_ranges . ndim < 2 return particle_ranges get_sass_transform ( theta , x , expansion_degree = 1 , sample_weight = None ) inherited \u00b6 Return semi-automatic summary statitics function. Running weighted linear regressin as in Fearnhead & Prandle 2012: https://arxiv.org/abs/1004.1112 Following implementation in https://abcpy.readthedocs.io/en/latest/_modules/abcpy/statistics.html#Identity and https://pythonhosted.org/abcpy/_modules/abcpy/summaryselections.html#Semiautomatic Source code in sbi/inference/abc/smcabc.py @staticmethod def get_sass_transform ( theta : torch . Tensor , x : torch . Tensor , expansion_degree : int = 1 , sample_weight = None , ) -> Callable : \"\"\"Return semi-automatic summary statitics function. Running weighted linear regressin as in Fearnhead & Prandle 2012: https://arxiv.org/abs/1004.1112 Following implementation in https://abcpy.readthedocs.io/en/latest/_modules/abcpy/statistics.html#Identity and https://pythonhosted.org/abcpy/_modules/abcpy/summaryselections.html#Semiautomatic \"\"\" expansion = PolynomialFeatures ( degree = expansion_degree , include_bias = False ) # Transform x, remove intercept. x_expanded = expansion . fit_transform ( x ) sumstats_map = np . zeros (( x_expanded . shape [ 1 ], theta . shape [ 1 ])) for parameter_idx in range ( theta . shape [ 1 ]): regression_model = LinearRegression ( fit_intercept = True ) regression_model . fit ( X = x_expanded , y = theta [:, parameter_idx ], sample_weight = sample_weight ) sumstats_map [:, parameter_idx ] = regression_model . coef_ sumstats_map = torch . tensor ( sumstats_map , dtype = torch . float32 ) def sumstats_transform ( x ): x_expanded = torch . tensor ( expansion . fit_transform ( x ), dtype = torch . float32 ) return x_expanded . mm ( sumstats_map ) return sumstats_transform resample_if_ess_too_small ( self , particles , log_weights , ess_min , pop_idx ) \u00b6 Return resampled particles and uniform weights if effectice sampling size is too small. Source code in sbi/inference/abc/smcabc.py def resample_if_ess_too_small ( self , particles : Tensor , log_weights : Tensor , ess_min : float , pop_idx : int , ) -> Tuple [ Tensor , Tensor ]: \"\"\"Return resampled particles and uniform weights if effectice sampling size is too small. \"\"\" num_particles = particles . shape [ 0 ] ess = ( 1 / torch . sum ( torch . exp ( 2.0 * log_weights ), dim = 0 )) / num_particles # Resampling of weights for low ESS only for Sisson et al. 2007. if ess < ess_min : self . logger . info ( f \"ESS= { ess : .2f } too low, resampling pop { pop_idx } ...\" ) # First resample, then set to uniform weights as in Sisson et al. 2007. particles = self . sample_from_population_with_weights ( particles , torch . exp ( log_weights ), num_samples = num_particles ) log_weights = torch . log ( 1 / num_particles * ones ( num_particles )) return particles , log_weights run_lra ( theta , x , observation , sample_weight = None ) inherited \u00b6 Return parameters adjusted with linear regression adjustment. Implementation as in Beaumont et al. 2002: https://arxiv.org/abs/1707.01254 Source code in sbi/inference/abc/smcabc.py @staticmethod def run_lra ( theta : torch . Tensor , x : torch . Tensor , observation : torch . Tensor , sample_weight = None , ) -> torch . Tensor : \"\"\"Return parameters adjusted with linear regression adjustment. Implementation as in Beaumont et al. 2002: https://arxiv.org/abs/1707.01254 \"\"\" theta_adjusted = theta for parameter_idx in range ( theta . shape [ 1 ]): regression_model = LinearRegression ( fit_intercept = True ) regression_model . fit ( X = x , y = theta [:, parameter_idx ], sample_weight = sample_weight , ) theta_adjusted [:, parameter_idx ] += regression_model . predict ( observation . reshape ( 1 , - 1 ) ) theta_adjusted [:, parameter_idx ] -= regression_model . predict ( x ) return theta_adjusted run_lra_update_weights ( self , particles , xs , observation , log_weights , lra_with_weights ) \u00b6 Return particles and weights adjusted with LRA. Runs (weighted) linear regression from xs onto particles to adjust the particles. Updates the SMC weights according to the new particles. Source code in sbi/inference/abc/smcabc.py def run_lra_update_weights ( self , particles : Tensor , xs : Tensor , observation : Tensor , log_weights : Tensor , lra_with_weights : bool , ) -> Tuple [ Tensor , Tensor ]: \"\"\"Return particles and weights adjusted with LRA. Runs (weighted) linear regression from xs onto particles to adjust the particles. Updates the SMC weights according to the new particles. \"\"\" adjusted_particels = self . run_lra ( theta = particles , x = xs , observation = observation , sample_weight = log_weights . exp () if lra_with_weights else None , ) # Update SMC weights with LRA adjusted weights adjusted_log_weights = self . _calculate_new_log_weights ( new_particles = adjusted_particels , old_particles = particles , old_log_weights = log_weights , ) return adjusted_particels , adjusted_log_weights run_sass_set_xo ( self , num_particles , num_pilot_simulations , x_o , lra = False , sass_expansion_degree = 1 ) \u00b6 Return transform for semi-automatic summary statistics. Runs an single round of rejection abc with fixed budget and accepts num_particles simulations to run the regression for sass. Sets self.x_o once the x_shape can be derived from simulations. Source code in sbi/inference/abc/smcabc.py def run_sass_set_xo ( self , num_particles : int , num_pilot_simulations : int , x_o , lra : bool = False , sass_expansion_degree : int = 1 , ) -> Callable : \"\"\"Return transform for semi-automatic summary statistics. Runs an single round of rejection abc with fixed budget and accepts num_particles simulations to run the regression for sass. Sets self.x_o once the x_shape can be derived from simulations. \"\"\" ( pilot_particles , _ , _ , pilot_xs , ) = self . _set_xo_and_sample_initial_population ( x_o , num_particles , num_pilot_simulations ) # Adjust with LRA. if lra : pilot_particles = self . run_lra ( pilot_particles , pilot_xs , self . x_o ) sass_transform = self . get_sass_transform ( pilot_particles , pilot_xs , expansion_degree = sass_expansion_degree , sample_weight = None , ) return sass_transform sample_from_population_with_weights ( particles , weights , num_samples = 1 ) staticmethod \u00b6 Return samples from particles sampled with weights. Source code in sbi/inference/abc/smcabc.py @staticmethod def sample_from_population_with_weights ( particles : Tensor , weights : Tensor , num_samples : int = 1 ) -> Tensor : \"\"\"Return samples from particles sampled with weights.\"\"\" # define multinomial with weights as probs multi = Multinomial ( probs = weights ) # sample num samples, with replacement samples = multi . sample ( sample_shape = torch . Size (( num_samples ,))) # get indices of success trials indices = torch . where ( samples )[ 1 ] # return those indices from trace return particles [ indices ] Posteriors \u00b6 sbi.inference.posteriors.direct_posterior.DirectPosterior ( NeuralPosterior ) \u00b6 Posterior \\(p(\\theta|x_o)\\) with log_prob() and sample() methods, only applicable to SNPE. SNPE trains a neural network to directly approximate the posterior distribution. However, for bounded priors, the neural network can have leakage: it puts non-zero mass in regions where the prior is zero. The DirectPosterior class wraps the trained network to deal with these cases. Specifically, this class offers the following functionality: - correct the calculation of the log probability such that it compensates for the leakage. - reject samples that lie outside of the prior bounds. This class can not be used in combination with SNLE or SNRE. default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. __init__ ( self , posterior_estimator , prior , max_sampling_batch_size = 10000 , device = None , x_shape = None , enable_transform = True ) special \u00b6 Parameters: Name Type Description Default prior Distribution Prior distribution with .log_prob() and .sample() . required posterior_estimator Flow The trained neural posterior. required max_sampling_batch_size int Batchsize of samples being drawn from the proposal at every iteration. 10000 device Optional[str] Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:0\u201d. If None, potential_fn.device is used. None x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None enable_transform bool Whether to transform parameters to unconstrained space during MAP optimization. When False, an identity transform will be returned for theta_transform . True Source code in sbi/inference/posteriors/direct_posterior.py def __init__ ( self , posterior_estimator : flows . Flow , prior : Distribution , max_sampling_batch_size : int = 10_000 , device : Optional [ str ] = None , x_shape : Optional [ torch . Size ] = None , enable_transform : bool = True , ): \"\"\" Args: prior: Prior distribution with `.log_prob()` and `.sample()`. posterior_estimator: The trained neural posterior. max_sampling_batch_size: Batchsize of samples being drawn from the proposal at every iteration. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:0\". If None, `potential_fn.device` is used. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. enable_transform: Whether to transform parameters to unconstrained space during MAP optimization. When False, an identity transform will be returned for `theta_transform`. \"\"\" # Because `DirectPosterior` does not take the `potential_fn` as input, it # builds it itself. The `potential_fn` and `theta_transform` are used only for # obtaining the MAP. check_prior ( prior ) potential_fn , theta_transform = posterior_estimator_based_potential ( posterior_estimator , prior , x_o = None , enable_transform = enable_transform , ) super () . __init__ ( potential_fn = potential_fn , theta_transform = theta_transform , device = device , x_shape = x_shape , ) self . prior = prior self . posterior_estimator = posterior_estimator self . max_sampling_batch_size = max_sampling_batch_size self . _leakage_density_correction_factor = None self . _purpose = \"\"\"It samples the posterior network and rejects samples that lie outside of the prior bounds.\"\"\" leakage_correction ( self , x , num_rejection_samples = 10000 , force_update = False , show_progress_bars = False , rejection_sampling_batch_size = 10000 ) \u00b6 Return leakage correction factor for a leaky posterior density estimate. The factor is estimated from the acceptance probability during rejection sampling from the posterior. This is to avoid re-estimating the acceptance probability from scratch whenever log_prob is called and norm_posterior=True . Here, it is estimated only once for self.default_x and saved for later. We re-evaluate only whenever a new x is passed. Parameters: Name Type Description Default num_rejection_samples int Number of samples used to estimate correction factor. 10000 show_progress_bars bool Whether to show a progress bar during sampling. False rejection_sampling_batch_size int Batch size for rejection sampling. 10000 Returns: Type Description Tensor Saved or newly-estimated correction factor (as a scalar Tensor ). Source code in sbi/inference/posteriors/direct_posterior.py @torch . no_grad () def leakage_correction ( self , x : Tensor , num_rejection_samples : int = 10_000 , force_update : bool = False , show_progress_bars : bool = False , rejection_sampling_batch_size : int = 10_000 , ) -> Tensor : r \"\"\"Return leakage correction factor for a leaky posterior density estimate. The factor is estimated from the acceptance probability during rejection sampling from the posterior. This is to avoid re-estimating the acceptance probability from scratch whenever `log_prob` is called and `norm_posterior=True`. Here, it is estimated only once for `self.default_x` and saved for later. We re-evaluate only whenever a new `x` is passed. Arguments: num_rejection_samples: Number of samples used to estimate correction factor. show_progress_bars: Whether to show a progress bar during sampling. rejection_sampling_batch_size: Batch size for rejection sampling. Returns: Saved or newly-estimated correction factor (as a scalar `Tensor`). \"\"\" def acceptance_at ( x : Tensor ) -> Tensor : return accept_reject_sample ( proposal = self . posterior_estimator , accept_reject_fn = lambda theta : within_support ( self . prior , theta ), num_samples = num_rejection_samples , show_progress_bars = show_progress_bars , sample_for_correction_factor = True , max_sampling_batch_size = rejection_sampling_batch_size , proposal_sampling_kwargs = { \"context\" : x }, )[ 1 ] # Check if the provided x matches the default x (short-circuit on identity). is_new_x = self . default_x is None or ( x is not self . default_x and ( x != self . default_x ) . any () ) not_saved_at_default_x = self . _leakage_density_correction_factor is None if is_new_x : # Calculate at x; don't save. return acceptance_at ( x ) elif not_saved_at_default_x or force_update : # Calculate at default_x; save. assert self . default_x is not None self . _leakage_density_correction_factor = acceptance_at ( self . default_x ) return self . _leakage_density_correction_factor # type: ignore log_prob ( self , theta , x = None , norm_posterior = True , track_gradients = False , leakage_correction_params = None ) \u00b6 Returns the log-probability of the posterior \\(p(\\theta|x)\\) . Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required norm_posterior bool Whether to enforce a normalized posterior density. Renormalization of the posterior is useful when some probability falls out or leaks out of the prescribed prior support. The normalizing factor is calculated via rejection sampling, so if you need speedier but unnormalized log posterior estimates set here norm_posterior=False . The returned log posterior is set to -\u221e outside of the prior support regardless of this setting. True track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False leakage_correction_params Optional[dict] A dict of keyword arguments to override the default values of leakage_correction() . Possible options are: num_rejection_samples , force_update , show_progress_bars , and rejection_sampling_batch_size . These parameters only have an effect if norm_posterior=True . None Returns: Type Description Tensor (len(\u03b8),) -shaped log posterior probability \\(\\log p(\\theta|x)\\) for \u03b8 in the support of the prior, -\u221e (corresponding to 0 probability) outside. Source code in sbi/inference/posteriors/direct_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , norm_posterior : bool = True , track_gradients : bool = False , leakage_correction_params : Optional [ dict ] = None , ) -> Tensor : r \"\"\"Returns the log-probability of the posterior $p(\\theta|x)$. Args: theta: Parameters $\\theta$. norm_posterior: Whether to enforce a normalized posterior density. Renormalization of the posterior is useful when some probability falls out or leaks out of the prescribed prior support. The normalizing factor is calculated via rejection sampling, so if you need speedier but unnormalized log posterior estimates set here `norm_posterior=False`. The returned log posterior is set to -\u221e outside of the prior support regardless of this setting. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. leakage_correction_params: A `dict` of keyword arguments to override the default values of `leakage_correction()`. Possible options are: `num_rejection_samples`, `force_update`, `show_progress_bars`, and `rejection_sampling_batch_size`. These parameters only have an effect if `norm_posterior=True`. Returns: `(len(\u03b8),)`-shaped log posterior probability $\\log p(\\theta|x)$ for \u03b8 in the support of the prior, -\u221e (corresponding to 0 probability) outside. \"\"\" x = self . _x_else_default_x ( x ) # TODO Train exited here, entered after sampling? self . posterior_estimator . eval () theta = ensure_theta_batched ( torch . as_tensor ( theta )) theta_repeated , x_repeated = match_theta_and_x_batch_shapes ( theta , x ) with torch . set_grad_enabled ( track_gradients ): # Evaluate on device, move back to cpu for comparison with prior. unnorm_log_prob = self . posterior_estimator . log_prob ( theta_repeated , context = x_repeated ) # Force probability to be zero outside prior support. in_prior_support = within_support ( self . prior , theta_repeated ) masked_log_prob = torch . where ( in_prior_support , unnorm_log_prob , torch . tensor ( float ( \"-inf\" ), dtype = torch . float32 , device = self . _device ), ) if leakage_correction_params is None : leakage_correction_params = dict () # use defaults log_factor = ( log ( self . leakage_correction ( x = x , ** leakage_correction_params )) if norm_posterior else 0 ) return masked_log_prob - log_factor map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'posterior' , num_init_samples = 1000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[torch.Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, torch.Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'posterior' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 1000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/direct_posterior.py def map ( self , x : Optional [ Tensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , Tensor ] = \"posterior\" , num_init_samples : int = 1_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/direct_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , max_sampling_batch_size = 10000 , sample_with = None , show_progress_bars = True ) \u00b6 Return samples from posterior distribution \\(p(\\theta|x)\\) . Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw sample_shape.numel() samples and then reshape into the desired shape. torch.Size([]) sample_with Optional[str] This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. If it is set, we instantly raise an error. None show_progress_bars bool Whether to show sampling progress monitor. True Source code in sbi/inference/posteriors/direct_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , max_sampling_batch_size : int = 10_000 , sample_with : Optional [ str ] = None , show_progress_bars : bool = True , ) -> Tensor : r \"\"\"Return samples from posterior distribution $p(\\theta|x)$. Args: sample_shape: Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw `sample_shape.numel()` samples and then reshape into the desired shape. sample_with: This argument only exists to keep backward-compatibility with `sbi` v0.17.2 or older. If it is set, we instantly raise an error. show_progress_bars: Whether to show sampling progress monitor. \"\"\" num_samples = torch . Size ( sample_shape ) . numel () x = self . _x_else_default_x ( x ) max_sampling_batch_size = ( self . max_sampling_batch_size if max_sampling_batch_size is None else max_sampling_batch_size ) if sample_with is not None : raise ValueError ( f \"You set `sample_with= { sample_with } `. As of sbi v0.18.0, setting \" f \"`sample_with` is no longer supported. You have to rerun \" f \"`.build_posterior(sample_with= { sample_with } ).`\" ) samples = accept_reject_sample ( proposal = self . posterior_estimator , accept_reject_fn = lambda theta : within_support ( self . prior , theta ), num_samples = num_samples , show_progress_bars = show_progress_bars , max_sampling_batch_size = max_sampling_batch_size , proposal_sampling_kwargs = { \"context\" : x }, alternative_method = \"build_posterior(..., sample_with='mcmc')\" , )[ 0 ] return samples set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/direct_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self sbi.inference.posteriors.importance_posterior.ImportanceSamplingPosterior ( NeuralPosterior ) \u00b6 Provides importance sampling to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). ImportanceSamplingPosterior allows to estimate the posterior log-probability by estimating the normlalization constant with importance sampling. It also allows to perform importance sampling (with .sample() ) and to draw approximate samples with sampling-importance-resampling (SIR) (with .sir_sample() ) default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. __init__ ( self , potential_fn , proposal , theta_transform = None , method = 'sir' , oversampling_factor = 32 , max_sampling_batch_size = 10000 , device = None , x_shape = None ) special \u00b6 Parameters: Name Type Description Default potential_fn Callable The potential function from which to draw samples. required proposal Any The proposal distribution. required theta_transform Optional[torch Transform] Transformation that is applied to parameters. Is not used during but only when calling .map() . None method str Either of [ sir | importance ]. This sets the behavior of the .sample() method. With sir , approximate posterior samples are generated with sampling importance resampling (SIR). With importance , the .sample() method returns a tuple of samples and corresponding importance weights. 'sir' oversampling_factor int Number of proposed samples from which only one is selected based on its importance weight. 32 max_sampling_batch_size int The batch size of samples being drawn from the proposal at every iteration. 10000 device Optional[str] Device on which to sample, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:0\u201d. If None, potential_fn.device is used. None x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None Source code in sbi/inference/posteriors/importance_posterior.py def __init__ ( self , potential_fn : Callable , proposal : Any , theta_transform : Optional [ TorchTransform ] = None , method : str = \"sir\" , oversampling_factor : int = 32 , max_sampling_batch_size : int = 10_000 , device : Optional [ str ] = None , x_shape : Optional [ torch . Size ] = None , ): \"\"\" Args: potential_fn: The potential function from which to draw samples. proposal: The proposal distribution. theta_transform: Transformation that is applied to parameters. Is not used during but only when calling `.map()`. method: Either of [`sir`|`importance`]. This sets the behavior of the `.sample()` method. With `sir`, approximate posterior samples are generated with sampling importance resampling (SIR). With `importance`, the `.sample()` method returns a tuple of samples and corresponding importance weights. oversampling_factor: Number of proposed samples from which only one is selected based on its importance weight. max_sampling_batch_size: The batch size of samples being drawn from the proposal at every iteration. device: Device on which to sample, e.g., \"cpu\", \"cuda\" or \"cuda:0\". If None, `potential_fn.device` is used. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. \"\"\" super () . __init__ ( potential_fn , theta_transform = theta_transform , device = device , x_shape = x_shape , ) self . proposal = proposal self . _normalization_constant = None self . method = method self . oversampling_factor = oversampling_factor self . max_sampling_batch_size = max_sampling_batch_size self . _purpose = ( \"It provides sampling-importance resampling (SIR) to .sample() from the \" \"posterior and can evaluate the _unnormalized_ posterior density with \" \".log_prob().\" ) estimate_normalization_constant ( self , x , num_samples = 10000 , force_update = False ) \u00b6 Returns the normalization constant via importance sampling. Parameters: Name Type Description Default num_samples int Number of importance samples used for the estimate. 10000 force_update bool Whether to re-calculate the normlization constant when x is unchanged and have a cached value. False Source code in sbi/inference/posteriors/importance_posterior.py @torch . no_grad () def estimate_normalization_constant ( self , x : Tensor , num_samples : int = 10_000 , force_update : bool = False ) -> Tensor : \"\"\"Returns the normalization constant via importance sampling. Args: num_samples: Number of importance samples used for the estimate. force_update: Whether to re-calculate the normlization constant when x is unchanged and have a cached value. \"\"\" # Check if the provided x matches the default x (short-circuit on identity). is_new_x = self . default_x is None or ( x is not self . default_x and ( x != self . default_x ) . any () ) not_saved_at_default_x = self . _normalization_constant is None if is_new_x : # Calculate at x; don't save. _ , log_importance_weights = importance_sample ( self . potential_fn , proposal = self . proposal , num_samples = num_samples , ) return torch . mean ( torch . exp ( log_importance_weights )) elif not_saved_at_default_x or force_update : # Calculate at default_x; save. assert self . default_x is not None _ , log_importance_weights = importance_sample ( self . potential_fn , proposal = self . proposal , num_samples = num_samples , ) self . _normalization_constant = torch . mean ( torch . exp ( log_importance_weights )) return self . _normalization_constant . to ( self . _device ) # type: ignore log_prob ( self , theta , x = None , track_gradients = False , normalization_constant_params = None ) \u00b6 Returns the log-probability of theta under the posterior. The normalization constant is estimated with importance sampling. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False normalization_constant_params Optional[dict] Parameters passed on to estimate_normalization_constant() . None Returns: Type Description Tensor len($\\theta$) -shaped log-probability. Source code in sbi/inference/posteriors/importance_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False , normalization_constant_params : Optional [ dict ] = None , ) -> Tensor : r \"\"\"Returns the log-probability of theta under the posterior. The normalization constant is estimated with importance sampling. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. normalization_constant_params: Parameters passed on to `estimate_normalization_constant()`. Returns: `len($\\theta$)`-shaped log-probability. \"\"\" x = self . _x_else_default_x ( x ) self . potential_fn . set_x ( x ) theta = ensure_theta_batched ( torch . as_tensor ( theta )) with torch . set_grad_enabled ( track_gradients ): potential_values = self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) if normalization_constant_params is None : normalization_constant_params = dict () # use defaults normalization_constant = self . estimate_normalization_constant ( x , ** normalization_constant_params ) return ( potential_values - torch . log ( normalization_constant )) . to ( self . _device ) map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'proposal' , num_init_samples = 1000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[torch.Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, torch.Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'proposal' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 1000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/importance_posterior.py def map ( self , x : Optional [ Tensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , Tensor ] = \"proposal\" , num_init_samples : int = 1_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/importance_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , oversampling_factor = 32 , max_sampling_batch_size = 10000 , sample_with = None ) \u00b6 Return samples from the approximate posterior distribution. Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] description torch.Size([]) x Optional[torch.Tensor] description None Source code in sbi/inference/posteriors/importance_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , oversampling_factor : int = 32 , max_sampling_batch_size : int = 10_000 , sample_with : Optional [ str ] = None , ) -> Union [ Tensor , Tuple [ Tensor , Tensor ]]: \"\"\"Return samples from the approximate posterior distribution. Args: sample_shape: _description_ x: _description_ \"\"\" if sample_with is not None : raise ValueError ( f \"You set `sample_with= { sample_with } `. As of sbi v0.18.0, setting \" f \"`sample_with` is no longer supported. You have to rerun \" f \"`.build_posterior(sample_with= { sample_with } ).`\" ) self . potential_fn . set_x ( self . _x_else_default_x ( x )) if self . method == \"sir\" : return self . _sir_sample ( sample_shape , oversampling_factor = oversampling_factor , max_sampling_batch_size = max_sampling_batch_size , ) elif self . method == \"importance\" : return self . _importance_sample ( sample_shape ) else : raise NameError set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/importance_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self sbi.inference.posteriors.mcmc_posterior.MCMCPosterior ( NeuralPosterior ) \u00b6 Provides MCMC to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). MCMCPosterior allows to sample from the posterior with MCMC. default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. mcmc_method : str property writable \u00b6 Returns MCMC method. posterior_sampler property readonly \u00b6 Returns sampler created by sample . __init__ ( self , potential_fn , proposal , theta_transform = None , method = 'slice_np' , thin = 10 , warmup_steps = 10 , num_chains = 1 , init_strategy = 'resample' , init_strategy_parameters = {}, init_strategy_num_candidates = None , num_workers = 1 , device = None , x_shape = None ) special \u00b6 Parameters: Name Type Description Default potential_fn Callable The potential function from which to draw samples. required proposal Any Proposal distribution that is used to initialize the MCMC chain. required theta_transform Optional[torch Transform] Transformation that will be applied during sampling. Allows to perform MCMC in unconstrained space. None method str Method used for MCMC sampling, one of slice_np , slice_np_vectorized , slice , hmc , nuts . slice_np is a custom numpy implementation of slice sampling. slice_np_vectorized is identical to slice_np , but if num_chains>1 , the chains are vectorized for slice_np_vectorized whereas they are run sequentially for slice_np . The samplers hmc , nuts or slice sample with Pyro. 'slice_np' thin int The thinning factor for the chain. 10 warmup_steps int The initial number of samples to discard. 10 num_chains int The number of chains. 1 init_strategy str The initialisation strategy for chains; proposal will draw init locations from proposal , whereas sir will use Sequential- Importance-Resampling (SIR). SIR initially samples init_strategy_num_candidates from the proposal , evaluates all of them under the potential_fn and proposal , and then resamples the initial locations with weights proportional to exp(potential_fn - proposal.log_prob . resample is the same as sir but uses exp(potential_fn) as weights. 'resample' init_strategy_parameters Dict[str, Any] Dictionary of keyword arguments passed to the init strategy, e.g., for init_strategy=sir this could be num_candidate_samples , i.e., the number of candidates to to find init locations (internal default is 1000 ), or device . {} init_strategy_num_candidates Optional[int] Number of candidates to to find init locations in init_strategy=sir (deprecated, use init_strategy_parameters instead). None num_workers int number of cpu cores used to parallelize mcmc 1 device Optional[str] Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:0\u201d. If None, potential_fn.device is used. None x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None Source code in sbi/inference/posteriors/mcmc_posterior.py def __init__ ( self , potential_fn : Callable , proposal : Any , theta_transform : Optional [ TorchTransform ] = None , method : str = \"slice_np\" , thin : int = 10 , warmup_steps : int = 10 , num_chains : int = 1 , init_strategy : str = \"resample\" , init_strategy_parameters : Dict [ str , Any ] = {}, init_strategy_num_candidates : Optional [ int ] = None , num_workers : int = 1 , device : Optional [ str ] = None , x_shape : Optional [ torch . Size ] = None , ): \"\"\" Args: potential_fn: The potential function from which to draw samples. proposal: Proposal distribution that is used to initialize the MCMC chain. theta_transform: Transformation that will be applied during sampling. Allows to perform MCMC in unconstrained space. method: Method used for MCMC sampling, one of `slice_np`, `slice_np_vectorized`, `slice`, `hmc`, `nuts`. `slice_np` is a custom numpy implementation of slice sampling. `slice_np_vectorized` is identical to `slice_np`, but if `num_chains>1`, the chains are vectorized for `slice_np_vectorized` whereas they are run sequentially for `slice_np`. The samplers `hmc`, `nuts` or `slice` sample with Pyro. thin: The thinning factor for the chain. warmup_steps: The initial number of samples to discard. num_chains: The number of chains. init_strategy: The initialisation strategy for chains; `proposal` will draw init locations from `proposal`, whereas `sir` will use Sequential- Importance-Resampling (SIR). SIR initially samples `init_strategy_num_candidates` from the `proposal`, evaluates all of them under the `potential_fn` and `proposal`, and then resamples the initial locations with weights proportional to `exp(potential_fn - proposal.log_prob`. `resample` is the same as `sir` but uses `exp(potential_fn)` as weights. init_strategy_parameters: Dictionary of keyword arguments passed to the init strategy, e.g., for `init_strategy=sir` this could be `num_candidate_samples`, i.e., the number of candidates to to find init locations (internal default is `1000`), or `device`. init_strategy_num_candidates: Number of candidates to to find init locations in `init_strategy=sir` (deprecated, use init_strategy_parameters instead). num_workers: number of cpu cores used to parallelize mcmc device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:0\". If None, `potential_fn.device` is used. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. \"\"\" super () . __init__ ( potential_fn , theta_transform = theta_transform , device = device , x_shape = x_shape , ) self . proposal = proposal self . method = method self . thin = thin self . warmup_steps = warmup_steps self . num_chains = num_chains self . init_strategy = init_strategy self . init_strategy_parameters = init_strategy_parameters self . num_workers = num_workers self . _posterior_sampler = None # Hardcode parameter name to reduce clutter kwargs. self . param_name = \"theta\" if init_strategy_num_candidates is not None : warn ( \"\"\"Passing `init_strategy_num_candidates` is deprecated as of sbi v0.19.0. Instead, use e.g., `init_strategy_parameters={\"num_candidate_samples\": 1000}`\"\"\" ) self . init_strategy_parameters [ \"num_candidate_samples\" ] = init_strategy_num_candidates self . potential_ = self . _prepare_potential ( method ) self . _purpose = ( \"It provides MCMC to .sample() from the posterior and \" \"can evaluate the _unnormalized_ posterior density with .log_prob().\" ) get_arviz_inference_data ( self ) \u00b6 Returns arviz InferenceData object constructed most recent samples. Note: the InferenceData is constructed using the posterior samples generated in most recent call to .sample(...) . For Pyro HMC and NUTS kernels InferenceData will contain diagnostics, for Pyro Slice or sbi slice sampling samples, only the samples are added. Returns: Type Description inference_data Arviz InferenceData object. Source code in sbi/inference/posteriors/mcmc_posterior.py def get_arviz_inference_data ( self ) -> InferenceData : \"\"\"Returns arviz InferenceData object constructed most recent samples. Note: the InferenceData is constructed using the posterior samples generated in most recent call to `.sample(...)`. For Pyro HMC and NUTS kernels InferenceData will contain diagnostics, for Pyro Slice or sbi slice sampling samples, only the samples are added. Returns: inference_data: Arviz InferenceData object. \"\"\" assert ( self . _posterior_sampler is not None ), \"\"\"No samples have been generated, call .sample() first.\"\"\" sampler : Union [ MCMC , SliceSamplerSerial , SliceSamplerVectorized ] = self . _posterior_sampler # If Pyro sampler and samples not transformed, use arviz' from_pyro. # Exclude 'slice' kernel as it lacks the 'divergence' diagnostics key. if isinstance ( self . _posterior_sampler , ( HMC , NUTS )) and isinstance ( self . theta_transform , torch_tf . IndependentTransform ): inference_data = az . from_pyro ( sampler ) # otherwise get samples from sampler and transform to original space. else : transformed_samples = sampler . get_samples ( group_by_chain = True ) # Pyro samplers returns dicts, get values. if isinstance ( transformed_samples , Dict ): # popitem gets last items, [1] get the values as tensor. transformed_samples = transformed_samples . popitem ()[ 1 ] # Our slice samplers return numpy arrays. elif isinstance ( transformed_samples , ndarray ): transformed_samples = torch . from_numpy ( transformed_samples ) . type ( torch . float32 ) # For MultipleIndependent priors transforms first dim must be batch dim. # thus, reshape back and forth to have batch dim in front. samples_shape = transformed_samples . shape samples = self . theta_transform . inv ( # type: ignore transformed_samples . reshape ( - 1 , samples_shape [ - 1 ]) ) . reshape ( # type: ignore * samples_shape ) inference_data = az . convert_to_inference_data ( { f \" { self . param_name } \" : samples } ) return inference_data log_prob ( self , theta , x = None , track_gradients = False ) \u00b6 Returns the log-probability of theta under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Returns: Type Description Tensor len($\\theta$) -shaped log-probability. Source code in sbi/inference/posteriors/mcmc_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Returns the log-probability of theta under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. Returns: `len($\\theta$)`-shaped log-probability. \"\"\" warn ( \"\"\"`.log_prob()` is deprecated for methods that can only evaluate the log-probability up to a normalizing constant. Use `.potential()` instead.\"\"\" ) warn ( \"The log-probability is unnormalized!\" ) self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'proposal' , num_init_samples = 1000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[torch.Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, torch.Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'proposal' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 1000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/mcmc_posterior.py def map ( self , x : Optional [ Tensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , Tensor ] = \"proposal\" , num_init_samples : int = 1_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/mcmc_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , method = None , thin = None , warmup_steps = None , num_chains = None , init_strategy = None , init_strategy_parameters = None , init_strategy_num_candidates = None , mcmc_parameters = {}, mcmc_method = None , sample_with = None , num_workers = None , show_progress_bars = True ) \u00b6 Return samples from posterior distribution \\(p(\\theta|x)\\) with MCMC. Check the __init__() method for a description of all arguments as well as their default values. Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw sample_shape.numel() samples and then reshape into the desired shape. torch.Size([]) mcmc_parameters Dict Dictionary that is passed only to support the API of sbi v0.17.2 or older. {} mcmc_method Optional[str] This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. Please use method instead. None sample_with Optional[str] This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. If it is set, we instantly raise an error. None show_progress_bars bool Whether to show sampling progress monitor. True Returns: Type Description Tensor Samples from posterior. Source code in sbi/inference/posteriors/mcmc_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , method : Optional [ str ] = None , thin : Optional [ int ] = None , warmup_steps : Optional [ int ] = None , num_chains : Optional [ int ] = None , init_strategy : Optional [ str ] = None , init_strategy_parameters : Optional [ Dict [ str , Any ]] = None , init_strategy_num_candidates : Optional [ int ] = None , mcmc_parameters : Dict = {}, mcmc_method : Optional [ str ] = None , sample_with : Optional [ str ] = None , num_workers : Optional [ int ] = None , show_progress_bars : bool = True , ) -> Tensor : r \"\"\"Return samples from posterior distribution $p(\\theta|x)$ with MCMC. Check the `__init__()` method for a description of all arguments as well as their default values. Args: sample_shape: Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw `sample_shape.numel()` samples and then reshape into the desired shape. mcmc_parameters: Dictionary that is passed only to support the API of `sbi` v0.17.2 or older. mcmc_method: This argument only exists to keep backward-compatibility with `sbi` v0.17.2 or older. Please use `method` instead. sample_with: This argument only exists to keep backward-compatibility with `sbi` v0.17.2 or older. If it is set, we instantly raise an error. show_progress_bars: Whether to show sampling progress monitor. Returns: Samples from posterior. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) # Replace arguments that were not passed with their default. method = self . method if method is None else method thin = self . thin if thin is None else thin warmup_steps = self . warmup_steps if warmup_steps is None else warmup_steps num_chains = self . num_chains if num_chains is None else num_chains init_strategy = self . init_strategy if init_strategy is None else init_strategy num_workers = self . num_workers if num_workers is None else num_workers init_strategy_parameters = ( self . init_strategy_parameters if init_strategy_parameters is None else init_strategy_parameters ) if init_strategy_num_candidates is not None : warn ( \"\"\"Passing `init_strategy_num_candidates` is deprecated as of sbi v0.19.0. Instead, use e.g., `init_strategy_parameters={\"num_candidate_samples\": 1000}`\"\"\" ) self . init_strategy_parameters [ \"num_candidate_samples\" ] = init_strategy_num_candidates if sample_with is not None : raise ValueError ( f \"You set `sample_with= { sample_with } `. As of sbi v0.18.0, setting \" f \"`sample_with` is no longer supported. You have to rerun \" f \"`.build_posterior(sample_with= { sample_with } ).`\" ) if mcmc_method is not None : warn ( \"You passed `mcmc_method` to `.sample()`. As of sbi v0.18.0, this \" \"is deprecated and will be removed in a future release. Use `method` \" \"instead of `mcmc_method`.\" ) method = mcmc_method if mcmc_parameters : warn ( \"You passed `mcmc_parameters` to `.sample()`. As of sbi v0.18.0, this \" \"is deprecated and will be removed in a future release. Instead, pass \" \"the variable to `.sample()` directly, e.g. \" \"`posterior.sample((1,), num_chains=5)`.\" ) # The following lines are only for backwards compatibility with sbi v0.17.2 or # older. m_p = mcmc_parameters # define to shorten the variable name method = _maybe_use_dict_entry ( method , \"mcmc_method\" , m_p ) thin = _maybe_use_dict_entry ( thin , \"thin\" , m_p ) warmup_steps = _maybe_use_dict_entry ( warmup_steps , \"warmup_steps\" , m_p ) num_chains = _maybe_use_dict_entry ( num_chains , \"num_chains\" , m_p ) init_strategy = _maybe_use_dict_entry ( init_strategy , \"init_strategy\" , m_p ) self . potential_ = self . _prepare_potential ( method ) # type: ignore initial_params = self . _get_initial_params ( init_strategy , # type: ignore num_chains , # type: ignore num_workers , show_progress_bars , ** init_strategy_parameters , ) num_samples = torch . Size ( sample_shape ) . numel () track_gradients = method in ( \"hmc\" , \"nuts\" ) with torch . set_grad_enabled ( track_gradients ): if method in ( \"slice_np\" , \"slice_np_vectorized\" ): transformed_samples = self . _slice_np_mcmc ( num_samples = num_samples , potential_function = self . potential_ , initial_params = initial_params , thin = thin , # type: ignore warmup_steps = warmup_steps , # type: ignore vectorized = ( method == \"slice_np_vectorized\" ), num_workers = num_workers , show_progress_bars = show_progress_bars , ) elif method in ( \"hmc\" , \"nuts\" , \"slice\" ): transformed_samples = self . _pyro_mcmc ( num_samples = num_samples , potential_function = self . potential_ , initial_params = initial_params , mcmc_method = method , # type: ignore thin = thin , # type: ignore warmup_steps = warmup_steps , # type: ignore num_chains = num_chains , show_progress_bars = show_progress_bars , ) else : raise NameError samples = self . theta_transform . inv ( transformed_samples ) return samples . reshape (( * sample_shape , - 1 )) # type: ignore set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/mcmc_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self set_mcmc_method ( self , method ) \u00b6 Sets sampling method to for MCMC and returns NeuralPosterior . Parameters: Name Type Description Default method str Method to use. required Returns: Type Description NeuralPosterior NeuralPosterior for chainable calls. Source code in sbi/inference/posteriors/mcmc_posterior.py def set_mcmc_method ( self , method : str ) -> \"NeuralPosterior\" : \"\"\"Sets sampling method to for MCMC and returns `NeuralPosterior`. Args: method: Method to use. Returns: `NeuralPosterior` for chainable calls. \"\"\" self . _mcmc_method = method return self sbi.inference.posteriors.rejection_posterior.RejectionPosterior ( NeuralPosterior ) \u00b6 Provides rejection sampling to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). RejectionPosterior allows to sample from the posterior with rejection sampling. default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. __init__ ( self , potential_fn , proposal , theta_transform = None , max_sampling_batch_size = 10000 , num_samples_to_find_max = 10000 , num_iter_to_find_max = 100 , m = 1.2 , device = None , x_shape = None ) special \u00b6 Parameters: Name Type Description Default potential_fn Callable The potential function from which to draw samples. required proposal Any The proposal distribution. required theta_transform Optional[torch Transform] Transformation that is applied to parameters. Is not used during but only when calling .map() . None max_sampling_batch_size int The batchsize of samples being drawn from the proposal at every iteration. 10000 num_samples_to_find_max int The number of samples that are used to find the maximum of the potential_fn / proposal ratio. 10000 num_iter_to_find_max int The number of gradient ascent iterations to find the maximum of the potential_fn / proposal ratio. 100 m float Multiplier to the potential_fn / proposal ratio. 1.2 device Optional[str] Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:0\u201d. If None, potential_fn.device is used. None x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None Source code in sbi/inference/posteriors/rejection_posterior.py def __init__ ( self , potential_fn : Callable , proposal : Any , theta_transform : Optional [ TorchTransform ] = None , max_sampling_batch_size : int = 10_000 , num_samples_to_find_max : int = 10_000 , num_iter_to_find_max : int = 100 , m : float = 1.2 , device : Optional [ str ] = None , x_shape : Optional [ torch . Size ] = None , ): \"\"\" Args: potential_fn: The potential function from which to draw samples. proposal: The proposal distribution. theta_transform: Transformation that is applied to parameters. Is not used during but only when calling `.map()`. max_sampling_batch_size: The batchsize of samples being drawn from the proposal at every iteration. num_samples_to_find_max: The number of samples that are used to find the maximum of the `potential_fn / proposal` ratio. num_iter_to_find_max: The number of gradient ascent iterations to find the maximum of the `potential_fn / proposal` ratio. m: Multiplier to the `potential_fn / proposal` ratio. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:0\". If None, `potential_fn.device` is used. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. \"\"\" super () . __init__ ( potential_fn , theta_transform = theta_transform , device = device , x_shape = x_shape , ) self . proposal = proposal self . max_sampling_batch_size = max_sampling_batch_size self . num_samples_to_find_max = num_samples_to_find_max self . num_iter_to_find_max = num_iter_to_find_max self . m = m self . _purpose = ( \"It provides rejection sampling to .sample() from the posterior and \" \"can evaluate the _unnormalized_ posterior density with .log_prob().\" ) log_prob ( self , theta , x = None , track_gradients = False ) \u00b6 Returns the log-probability of theta under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Returns: Type Description Tensor len($\\theta$) -shaped log-probability. Source code in sbi/inference/posteriors/rejection_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Returns the log-probability of theta under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. Returns: `len($\\theta$)`-shaped log-probability. \"\"\" warn ( \"`.log_prob()` is deprecated for methods that can only evaluate the log-probability up to a normalizing constant. Use `.potential()` instead.\" ) warn ( \"The log-probability is unnormalized!\" ) self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'proposal' , num_init_samples = 1000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[torch.Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, torch.Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'proposal' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 1000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/rejection_posterior.py def map ( self , x : Optional [ Tensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , Tensor ] = \"proposal\" , num_init_samples : int = 1_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/rejection_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , max_sampling_batch_size = None , num_samples_to_find_max = None , num_iter_to_find_max = None , m = None , sample_with = None , show_progress_bars = True ) \u00b6 Return samples from posterior \\(p(\\theta|x)\\) via rejection sampling. Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw sample_shape.numel() samples and then reshape into the desired shape. torch.Size([]) sample_with Optional[str] This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. If it is set, we instantly raise an error. None show_progress_bars bool Whether to show sampling progress monitor. True Returns: Type Description Samples from posterior. Source code in sbi/inference/posteriors/rejection_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , max_sampling_batch_size : Optional [ int ] = None , num_samples_to_find_max : Optional [ int ] = None , num_iter_to_find_max : Optional [ int ] = None , m : Optional [ float ] = None , sample_with : Optional [ str ] = None , show_progress_bars : bool = True , ): r \"\"\"Return samples from posterior $p(\\theta|x)$ via rejection sampling. Args: sample_shape: Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw `sample_shape.numel()` samples and then reshape into the desired shape. sample_with: This argument only exists to keep backward-compatibility with `sbi` v0.17.2 or older. If it is set, we instantly raise an error. show_progress_bars: Whether to show sampling progress monitor. Returns: Samples from posterior. \"\"\" num_samples = torch . Size ( sample_shape ) . numel () self . potential_fn . set_x ( self . _x_else_default_x ( x )) potential = partial ( self . potential_fn , track_gradients = True ) if sample_with is not None : raise ValueError ( f \"You set `sample_with= { sample_with } `. As of sbi v0.18.0, setting \" f \"`sample_with` is no longer supported. You have to rerun \" f \"`.build_posterior(sample_with= { sample_with } ).`\" ) # Replace arguments that were not passed with their default. max_sampling_batch_size = ( self . max_sampling_batch_size if max_sampling_batch_size is None else max_sampling_batch_size ) num_samples_to_find_max = ( self . num_samples_to_find_max if num_samples_to_find_max is None else num_samples_to_find_max ) num_iter_to_find_max = ( self . num_iter_to_find_max if num_iter_to_find_max is None else num_iter_to_find_max ) m = self . m if m is None else m samples , _ = rejection_sample ( potential , proposal = self . proposal , num_samples = num_samples , show_progress_bars = show_progress_bars , warn_acceptance = 0.01 , max_sampling_batch_size = max_sampling_batch_size , num_samples_to_find_max = num_samples_to_find_max , num_iter_to_find_max = num_iter_to_find_max , m = m , device = self . _device , ) return samples . reshape (( * sample_shape , - 1 )) set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/rejection_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self sbi.inference.posteriors.vi_posterior.VIPosterior ( NeuralPosterior ) \u00b6 Provides VI (Variational Inference) to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). VIPosterior allows to learn a tractable variational posterior \\(q(\\theta)\\) which approximates the true posterior \\(p(\\theta|x_o)\\) . After this second training stage, we can produce approximate posterior samples, by just sampling from q with no additional cost. For additional information see [1] and [2]. References: [1] Variational methods for simulation-based inference, Manuel Gl\u00f6ckler, Michael Deistler, Jakob Macke, 2022, https://openreview.net/forum?id=kZ0UYdhqkNY [2] Sequential Neural Posterior and Likelihood Approximation, Samuel Wiqvist, Jes Frellsen, Umberto Picchini, 2021, https://arxiv.org/abs/2102.06522 default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. q : Distribution property writable \u00b6 Returns the variational posterior. vi_method : str property writable \u00b6 Variational inference method e.g. one of [rKL, fKL, IW, alpha]. __init__ ( self , potential_fn , prior = None , q = 'maf' , theta_transform = None , vi_method = 'rKL' , device = 'cpu' , x_shape = None , parameters = [], modules = []) special \u00b6 Parameters: Name Type Description Default potential_fn Callable The potential function from which to draw samples. required prior Optional[torch Distribution] This is the prior distribution. Note that this is only used to check/construct the variational distribution or within some quality metrics. Please make sure that this matches with the prior within the potential_fn. If None is given, we will try to infer it from potential_fn or q, if this fails we raise an Error. None q Union[str, pyro.distributions.torch.TransformedDistribution, VIPosterior, Callable] Variational distribution, either string, TransformedDistribution , or a VIPosterior object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. You can also specify your own variational family by passing a pyro TransformedDistribution . Additionally, we allow a Callable , which allows you the pass a builder function, which if called returns a distribution. This may be useful for setting the hyperparameters e.g. num_transfroms within the get_flow_builder method specifying the number of transformations within a normalizing flow. If q is already a VIPosterior , then the arguments will be copied from it (relevant for multi-round training). 'maf' theta_transform Optional[torch Transform] Maps form prior support to unconstrained space. The inverse is used here to ensure that the posterior support is equal to that of the prior. None vi_method str This specifies the variational methods which are used to fit q to the posterior. We currently support [rKL, fKL, IW, alpha]. Note that some of the divergences are mode seeking i.e. they underestimate variance and collapse on multimodal targets ( rKL , alpha for alpha > 1) and some are mass covering i.e. they overestimate variance but typically cover all modes ( fKL , IW , alpha for alpha < 1). 'rKL' device str Training device, e.g., cpu , cuda or cuda:0 . We will ensure that all other objects are also on this device. 'cpu' x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None parameters Iterable List of parameters of the variational posterior. This is only required for user-defined q i.e. if q does not have a parameters attribute. [] modules Iterable List of modules of the variational posterior. This is only required for user-defined q i.e. if q does not have a modules attribute. [] Source code in sbi/inference/posteriors/vi_posterior.py def __init__ ( self , potential_fn : Callable , prior : Optional [ TorchDistribution ] = None , q : Union [ str , PyroTransformedDistribution , \"VIPosterior\" , Callable ] = \"maf\" , theta_transform : Optional [ TorchTransform ] = None , vi_method : str = \"rKL\" , device : str = \"cpu\" , x_shape : Optional [ torch . Size ] = None , parameters : Iterable = [], modules : Iterable = [], ): \"\"\" Args: potential_fn: The potential function from which to draw samples. prior: This is the prior distribution. Note that this is only used to check/construct the variational distribution or within some quality metrics. Please make sure that this matches with the prior within the potential_fn. If `None` is given, we will try to infer it from potential_fn or q, if this fails we raise an Error. q: Variational distribution, either string, `TransformedDistribution`, or a `VIPosterior` object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. You can also specify your own variational family by passing a pyro `TransformedDistribution`. Additionally, we allow a `Callable`, which allows you the pass a `builder` function, which if called returns a distribution. This may be useful for setting the hyperparameters e.g. `num_transfroms` within the `get_flow_builder` method specifying the number of transformations within a normalizing flow. If q is already a `VIPosterior`, then the arguments will be copied from it (relevant for multi-round training). theta_transform: Maps form prior support to unconstrained space. The inverse is used here to ensure that the posterior support is equal to that of the prior. vi_method: This specifies the variational methods which are used to fit q to the posterior. We currently support [rKL, fKL, IW, alpha]. Note that some of the divergences are `mode seeking` i.e. they underestimate variance and collapse on multimodal targets (`rKL`, `alpha` for alpha > 1) and some are `mass covering` i.e. they overestimate variance but typically cover all modes (`fKL`, `IW`, `alpha` for alpha < 1). device: Training device, e.g., `cpu`, `cuda` or `cuda:0`. We will ensure that all other objects are also on this device. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. parameters: List of parameters of the variational posterior. This is only required for user-defined q i.e. if q does not have a `parameters` attribute. modules: List of modules of the variational posterior. This is only required for user-defined q i.e. if q does not have a `modules` attribute. \"\"\" super () . __init__ ( potential_fn , theta_transform , device , x_shape = x_shape ) # Especially the prior may be on another device -> move it... self . _device = device self . potential_fn . device = device move_all_tensor_to_device ( self . potential_fn , device ) # Get prior and previous builds if prior is not None : self . _prior = prior elif hasattr ( self . potential_fn , \"prior\" ) and isinstance ( self . potential_fn . prior , Distribution ): self . _prior = self . potential_fn . prior elif isinstance ( q , VIPosterior ) and isinstance ( q . _prior , Distribution ): self . _prior = q . _prior else : raise ValueError ( \"We could not find a suitable prior distribution within `potential_fn`\" \"or `q` (if a VIPosterior is given). Please explicitly specify a prior.\" ) move_all_tensor_to_device ( self . _prior , device ) self . _optimizer = None # In contrast to MCMC we want to project into constrained space. if theta_transform is None : self . link_transform = mcmc_transform ( self . _prior ) . inv else : self . link_transform = theta_transform . inv # This will set the variational distribution and VI method self . set_q ( q , parameters = parameters , modules = modules ) self . set_vi_method ( vi_method ) self . _purpose = ( \"It provides Variational inference to .sample() from the posterior and \" \"can evaluate the _normalized_ posterior density with .log_prob().\" ) evaluate ( self , quality_control_metric = 'psis' , N = 50000 ) \u00b6 This function will evaluate the quality of the variational posterior distribution. We currently support two different metrics of type psis , which checks the quality based on the tails of importance weights (there should not be much with a large one), or prop which checks the proportionality between q and potential_fn. NOTE: In our experience prop is sensitive to distinguish good from ok whereas psis is more sensitive in distinguishing very bad from ok . Parameters: Name Type Description Default quality_control_metric str The metric of choice, we currently support [psis, prop, prop_prior]. 'psis' N int Number of samples which is used to evaluate the metric. 50000 Source code in sbi/inference/posteriors/vi_posterior.py def evaluate ( self , quality_control_metric : str = \"psis\" , N : int = int ( 5e4 )) -> None : \"\"\"This function will evaluate the quality of the variational posterior distribution. We currently support two different metrics of type `psis`, which checks the quality based on the tails of importance weights (there should not be much with a large one), or `prop` which checks the proportionality between q and potential_fn. NOTE: In our experience `prop` is sensitive to distinguish ``good`` from ``ok`` whereas `psis` is more sensitive in distinguishing `very bad` from `ok`. Args: quality_control_metric: The metric of choice, we currently support [psis, prop, prop_prior]. N: Number of samples which is used to evaluate the metric. \"\"\" quality_control_fn , quality_control_msg = get_quality_metric ( quality_control_metric ) metric = round ( float ( quality_control_fn ( self , N = N )), 3 ) print ( f \"Quality Score: { metric } \" + quality_control_msg ) log_prob ( self , theta , x = None , track_gradients = False ) \u00b6 Returns the log-probability of theta under the variational posterior. Parameters: Name Type Description Default theta Tensor Parameters required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis but increases memory consumption. False Returns: Type Description Tensor len($\\theta$) -shaped log-probability. Source code in sbi/inference/posteriors/vi_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False , ) -> Tensor : r \"\"\"Returns the log-probability of theta under the variational posterior. Args: theta: Parameters track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis but increases memory consumption. Returns: `len($\\theta$)`-shaped log-probability. \"\"\" x = self . _x_else_default_x ( x ) if self . _trained_on is None or ( x != self . _trained_on ) . all (): raise AttributeError ( f \"The variational posterior was not fit using observation { x } . \\ Please train.\" ) with torch . set_grad_enabled ( track_gradients ): theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . q . log_prob ( theta ) map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'proposal' , num_init_samples = 10000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'proposal' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 10000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/vi_posterior.py def map ( self , x : Optional [ TorchTensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , TorchTensor ] = \"proposal\" , num_init_samples : int = 10_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" self . proposal = self . q return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/vi_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , ** kwargs ) \u00b6 Samples from the variational posterior distribution. Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] Shape of samples torch.Size([]) Returns: Type Description Tensor Samples from posterior. Source code in sbi/inference/posteriors/vi_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , ** kwargs , ) -> Tensor : \"\"\"Samples from the variational posterior distribution. Args: sample_shape: Shape of samples Returns: Samples from posterior. \"\"\" x = self . _x_else_default_x ( x ) if self . _trained_on is None or ( x != self . _trained_on ) . all (): raise AttributeError ( f \"The variational posterior was not fit on the specified `default_x` \" f \" { x } . Please train using `posterior.train()`.\" ) samples = self . q . sample ( torch . Size ( sample_shape )) return samples . reshape (( * sample_shape , samples . shape [ - 1 ])) set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/vi_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self set_q ( self , q , parameters = [], modules = []) \u00b6 Defines the variational family. You can specify over which parameters/modules we optimize. This is required for custom distributions which e.g. do not inherit nn.Modules or has the function parameters or modules to give direct access to trainable parameters. Further, you can pass a function, which constructs a variational distribution if called. Parameters: Name Type Description Default q Union[str, pyro.distributions.torch.TransformedDistribution, VIPosterior, Callable] Variational distribution, either string, distribution, or a VIPosterior object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. Of course, you can also specify your own variational family by passing a parameterized distribution object i.e. a torch.distributions Distribution with methods parameters returning an iterable of all parameters (you can pass them within the paramters/modules attribute). Additionally, we allow a Callable , which allows you the pass a builder function, which if called returns an distribution. This may be useful for setting the hyperparameters e.g. num_transfroms:int by using the get_flow_builder method specifying the hyperparameters. If q is already a VIPosterior , then the arguments will be copied from it (relevant for multi-round training). required parameters Iterable List of parameters associated with the distribution object. [] modules Iterable List of modules associated with the distribution object. [] Source code in sbi/inference/posteriors/vi_posterior.py def set_q ( self , q : Union [ str , PyroTransformedDistribution , \"VIPosterior\" , Callable ], parameters : Iterable = [], modules : Iterable = [], ) -> None : \"\"\"Defines the variational family. You can specify over which parameters/modules we optimize. This is required for custom distributions which e.g. do not inherit nn.Modules or has the function `parameters` or `modules` to give direct access to trainable parameters. Further, you can pass a function, which constructs a variational distribution if called. Args: q: Variational distribution, either string, distribution, or a VIPosterior object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. Of course, you can also specify your own variational family by passing a `parameterized` distribution object i.e. a torch.distributions Distribution with methods `parameters` returning an iterable of all parameters (you can pass them within the paramters/modules attribute). Additionally, we allow a `Callable`, which allows you the pass a `builder` function, which if called returns an distribution. This may be useful for setting the hyperparameters e.g. `num_transfroms:int` by using the `get_flow_builder` method specifying the hyperparameters. If q is already a `VIPosterior`, then the arguments will be copied from it (relevant for multi-round training). parameters: List of parameters associated with the distribution object. modules: List of modules associated with the distribution object. \"\"\" self . _q_arg = q if isinstance ( q , Distribution ): q = adapt_variational_distribution ( q , self . _prior , self . link_transform , parameters = parameters , modules = modules , ) make_object_deepcopy_compatible ( q ) self_custom_q_init_cache = deepcopy ( q ) self . _q_build_fn = lambda * args , ** kwargs : self_custom_q_init_cache self . _trained_on = None elif isinstance ( q , str ) or isinstance ( q , Callable ): if isinstance ( q , str ): self . _q_build_fn = get_flow_builder ( q ) else : self . _q_build_fn = q q = self . _q_build_fn ( self . _prior . event_shape , self . link_transform , device = self . _device , ) make_object_deepcopy_compatible ( q ) self . _trained_on = None elif isinstance ( q , VIPosterior ): self . _q_build_fn = q . _q_build_fn self . _trained_on = q . _trained_on self . vi_method = q . vi_method # type: ignore self . _device = q . _device self . _prior = q . _prior self . _x = q . _x self . _q_arg = q . _q_arg make_object_deepcopy_compatible ( q . q ) q = deepcopy ( q . q ) move_all_tensor_to_device ( q , self . _device ) assert isinstance ( q , Distribution ), \"\"\"Something went wrong when initializing the variational distribution. Please create an issue on github https://github.com/mackelab/sbi/issues\"\"\" check_variational_distribution ( q , self . _prior ) self . _q = q set_vi_method ( self , method ) \u00b6 Sets variational inference method. Parameters: Name Type Description Default method str One of [rKL, fKL, IW, alpha]. required Returns: Type Description VIPosterior VIPosterior for chainable calls. Source code in sbi/inference/posteriors/vi_posterior.py def set_vi_method ( self , method : str ) -> \"VIPosterior\" : \"\"\"Sets variational inference method. Args: method: One of [rKL, fKL, IW, alpha]. Returns: `VIPosterior` for chainable calls. \"\"\" self . _vi_method = method self . _optimizer_builder = get_VI_method ( method ) return self train ( self , x = None , n_particles = 256 , learning_rate = 0.001 , gamma = 0.999 , max_num_iters = 2000 , min_num_iters = 10 , clip_value = 10.0 , warm_up_rounds = 100 , retrain_from_scratch = False , reset_optimizer = False , show_progress_bar = True , check_for_convergence = True , quality_control = True , quality_control_metric = 'psis' , ** kwargs ) \u00b6 This method trains the variational posterior. Parameters: Name Type Description Default x Optional[Tensor] The observation. None n_particles int Number of samples to approximate expectations within the variational bounds. The larger the more accurate are gradient estimates, but the computational cost per iteration increases. 256 learning_rate float Learning rate of the optimizer. 0.001 gamma float Learning rate decay per iteration. We use an exponential decay scheduler. 0.999 max_num_iters int Maximum number of iterations. 2000 min_num_iters int Minimum number of iterations. 10 clip_value float Gradient clipping value, decreasing may help if you see invalid values. 10.0 warm_up_rounds int Initialize the posterior as the prior. 100 retrain_from_scratch bool Retrain the variational distributions from scratch. False reset_optimizer bool Reset the divergence optimizer False show_progress_bar bool If any progress report should be displayed. True quality_control bool If False quality control is skipped. True quality_control_metric str Which metric to use for evaluating the quality. 'psis' kwargs Hyperparameters check corresponding DivergenceOptimizer for detail eps: Determines sensitivity of convergence check. retain_graph: Boolean which decides whether to retain the computation graph. This may be required for some exotic user-specified q\u2019s. optimizer: A PyTorch Optimizer class e.g. Adam or SGD. See DivergenceOptimizer for details. scheduler: A PyTorch learning rate scheduler. See DivergenceOptimizer for details. alpha: Only used if vi_method= alpha . Determines the alpha divergence. K: Only used if vi_method= IW . Determines the number of importance weighted particles. stick_the_landing: If one should use the STL estimator (only for rKL, IW, alpha). dreg: If one should use the DREG estimator (only for rKL, IW, alpha). weight_transform: Callable applied to importance weights (only for fKL) {} Returns: Type Description VIPosterior VIPosterior (can be used to chain calls). Source code in sbi/inference/posteriors/vi_posterior.py def train ( self , x : Optional [ TorchTensor ] = None , n_particles : int = 256 , learning_rate : float = 1e-3 , gamma : float = 0.999 , max_num_iters : int = 2000 , min_num_iters : int = 10 , clip_value : float = 10.0 , warm_up_rounds : int = 100 , retrain_from_scratch : bool = False , reset_optimizer : bool = False , show_progress_bar : bool = True , check_for_convergence : bool = True , quality_control : bool = True , quality_control_metric : str = \"psis\" , ** kwargs , ) -> \"VIPosterior\" : \"\"\"This method trains the variational posterior. Args: x: The observation. n_particles: Number of samples to approximate expectations within the variational bounds. The larger the more accurate are gradient estimates, but the computational cost per iteration increases. learning_rate: Learning rate of the optimizer. gamma: Learning rate decay per iteration. We use an exponential decay scheduler. max_num_iters: Maximum number of iterations. min_num_iters: Minimum number of iterations. clip_value: Gradient clipping value, decreasing may help if you see invalid values. warm_up_rounds: Initialize the posterior as the prior. retrain_from_scratch: Retrain the variational distributions from scratch. reset_optimizer: Reset the divergence optimizer show_progress_bar: If any progress report should be displayed. quality_control: If False quality control is skipped. quality_control_metric: Which metric to use for evaluating the quality. kwargs: Hyperparameters check corresponding `DivergenceOptimizer` for detail eps: Determines sensitivity of convergence check. retain_graph: Boolean which decides whether to retain the computation graph. This may be required for some `exotic` user-specified q's. optimizer: A PyTorch Optimizer class e.g. Adam or SGD. See `DivergenceOptimizer` for details. scheduler: A PyTorch learning rate scheduler. See `DivergenceOptimizer` for details. alpha: Only used if vi_method=`alpha`. Determines the alpha divergence. K: Only used if vi_method=`IW`. Determines the number of importance weighted particles. stick_the_landing: If one should use the STL estimator (only for rKL, IW, alpha). dreg: If one should use the DREG estimator (only for rKL, IW, alpha). weight_transform: Callable applied to importance weights (only for fKL) Returns: VIPosterior: `VIPosterior` (can be used to chain calls). \"\"\" # Update optimizer with current arguments. if self . _optimizer is not None : self . _optimizer . update ({ ** locals (), ** kwargs }) # Init q and the optimizer if necessary if retrain_from_scratch : self . q = self . _q_build_fn () # type: ignore self . _optimizer = self . _optimizer_builder ( self . potential_fn , self . q , lr = learning_rate , clip_value = clip_value , gamma = gamma , n_particles = n_particles , prior = self . _prior , ** kwargs , ) if ( reset_optimizer or self . _optimizer is None or not isinstance ( self . _optimizer , self . _optimizer_builder ) ): self . _optimizer = self . _optimizer_builder ( self . potential_fn , self . q , lr = learning_rate , clip_value = clip_value , gamma = gamma , n_particles = n_particles , prior = self . _prior , ** kwargs , ) # Check context x = atleast_2d_float32_tensor ( self . _x_else_default_x ( x )) . to ( # type: ignore self . _device ) already_trained = self . _trained_on is not None and ( x == self . _trained_on ) . all () # Optimize optimizer = self . _optimizer optimizer . to ( self . _device ) optimizer . reset_loss_stats () if show_progress_bar : iters = tqdm ( range ( max_num_iters )) else : iters = range ( max_num_iters ) # Warmup before training if reset_optimizer or ( not optimizer . warm_up_was_done and not already_trained ): if show_progress_bar : iters . set_description ( # type: ignore \"Warmup phase, this may take a few seconds...\" ) optimizer . warm_up ( warm_up_rounds ) for i in iters : optimizer . step ( x ) mean_loss , std_loss = optimizer . get_loss_stats () # Update progress bar if show_progress_bar : assert isinstance ( iters , tqdm ) iters . set_description ( # type: ignore f \"Loss: { np . round ( float ( mean_loss ), 2 ) } \" f \"Std: { np . round ( float ( std_loss ), 2 ) } \" ) # Check for convergence if check_for_convergence and i > min_num_iters : if optimizer . converged (): if show_progress_bar : print ( f \" \\n Converged with loss: { np . round ( float ( mean_loss ), 2 ) } \" ) break # Training finished: self . _trained_on = x # Evaluate quality if quality_control : try : self . evaluate ( quality_control_metric = quality_control_metric ) except Exception as e : print ( f \"Quality control did not work, we reset the variational \\ posterior,please check your setting. \\ \\n Following error occured { e } \" ) self . train ( learning_rate = learning_rate * 0.1 , retrain_from_scratch = True , reset_optimizer = True , ) return self Models \u00b6 sbi . utils . get_nn_models . posterior_nn ( model , z_score_theta = 'independent' , z_score_x = 'independent' , hidden_features = 50 , num_transforms = 5 , num_bins = 10 , embedding_net = Identity (), num_components = 10 , ** kwargs ) \u00b6 Returns a function that builds a density estimator for learning the posterior. This function will usually be used for SNPE. The returned function is to be passed to the inference class when using the flexible interface. Parameters: Name Type Description Default model str The type of density estimator that will be created. One of [ mdn , made , maf , maf_rqs , nsf ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 num_transforms int Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a maf or a nsf ). Ignored if density estimator is a mdn or made . 5 num_bins int Number of bins used for the splines in nsf . Ignored if density estimator not nsf . 10 embedding_net Module Optional embedding network for simulation outputs \\(x\\) . This embedding net allows to learn features from potentially high-dimensional simulation outputs. Identity() num_components int Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. 10 kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def posterior_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , num_transforms : int = 5 , num_bins : int = 10 , embedding_net : nn . Module = nn . Identity (), num_components : int = 10 , ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a density estimator for learning the posterior. This function will usually be used for SNPE. The returned function is to be passed to the inference class when using the flexible interface. Args: model: The type of density estimator that will be created. One of [`mdn`, `made`, `maf`, `maf_rqs`, `nsf`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. num_transforms: Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a `maf` or a `nsf`). Ignored if density estimator is a `mdn` or `made`. num_bins: Number of bins used for the splines in `nsf`. Ignored if density estimator not `nsf`. embedding_net: Optional embedding network for simulation outputs $x$. This embedding net allows to learn features from potentially high-dimensional simulation outputs. num_components: Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"num_transforms\" , \"num_bins\" , \"embedding_net\" , \"num_components\" , ), ( z_score_theta , z_score_x , hidden_features , num_transforms , num_bins , embedding_net , num_components , ), ), ** kwargs , ) def build_fn_snpe_a ( batch_theta , batch_x , num_components ): \"\"\"Build function for SNPE-A Extract the number of components from the kwargs, such that they are exposed as a kwargs, offering the possibility to later override this kwarg with `functools.partial`. This is necessary in order to make sure that the MDN in SNPE-A only has one component when running the Algorithm 1 part. \"\"\" return build_mdn ( batch_x = batch_theta , batch_y = batch_x , num_components = num_components , ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"mdn\" : return build_mdn ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"made\" : return build_made ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"maf\" : return build_maf ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"maf_rqs\" : return build_maf_rqs ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"nsf\" : return build_nsf ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) else : raise NotImplementedError if model == \"mdn_snpe_a\" : if num_components != 10 : raise ValueError ( \"You set `num_components`. For SNPE-A, this has to be done at \" \"instantiation of the inference object, i.e. \" \"`inference = SNPE_A(..., num_components=20)`\" ) kwargs . pop ( \"num_components\" ) return build_fn_snpe_a if model == \"mdn_snpe_a\" else build_fn sbi . utils . get_nn_models . likelihood_nn ( model , z_score_theta = 'independent' , z_score_x = 'independent' , hidden_features = 50 , num_transforms = 5 , num_bins = 10 , embedding_net = Identity (), num_components = 10 , ** kwargs ) \u00b6 Returns a function that builds a density estimator for learning the likelihood. This function will usually be used for SNLE. The returned function is to be passed to the inference class when using the flexible interface. Parameters: Name Type Description Default model str The type of density estimator that will be created. One of [ mdn , made , maf , maf_rqs , nsf ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 num_transforms int Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a maf or a nsf ). Ignored if density estimator is a mdn or made . 5 num_bins int Number of bins used for the splines in nsf . Ignored if density estimator not nsf . 10 embedding_net Module Optional embedding network for parameters \\(\\theta\\) . Identity() num_components int Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. 10 kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def likelihood_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , num_transforms : int = 5 , num_bins : int = 10 , embedding_net : nn . Module = nn . Identity (), num_components : int = 10 , ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a density estimator for learning the likelihood. This function will usually be used for SNLE. The returned function is to be passed to the inference class when using the flexible interface. Args: model: The type of density estimator that will be created. One of [`mdn`, `made`, `maf`, `maf_rqs`, `nsf`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. num_transforms: Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a `maf` or a `nsf`). Ignored if density estimator is a `mdn` or `made`. num_bins: Number of bins used for the splines in `nsf`. Ignored if density estimator not `nsf`. embedding_net: Optional embedding network for parameters $\\theta$. num_components: Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"num_transforms\" , \"num_bins\" , \"embedding_net\" , \"num_components\" , ), ( z_score_x , z_score_theta , hidden_features , num_transforms , num_bins , embedding_net , num_components , ), ), ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"mdn\" : return build_mdn ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"made\" : return build_made ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"maf\" : return build_maf ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"maf_rqs\" : return build_maf_rqs ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"nsf\" : return build_nsf ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"mnle\" : return build_mnle ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) else : raise NotImplementedError return build_fn sbi . utils . get_nn_models . classifier_nn ( model , z_score_theta = 'independent' , z_score_x = 'independent' , hidden_features = 50 , embedding_net_theta = Identity (), embedding_net_x = Identity (), ** kwargs ) \u00b6 Returns a function that builds a classifier for learning density ratios. This function will usually be used for SNRE. The returned function is to be passed to the inference class when using the flexible interface. Note that in the view of the SNRE classifier we build below, x=theta and y=x. Parameters: Name Type Description Default model str The type of classifier that will be created. One of [ linear , mlp , resnet ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 embedding_net_theta Module Optional embedding network for parameters \\(\\theta\\) . Identity() embedding_net_x Module Optional embedding network for simulation outputs \\(x\\) . This embedding net allows to learn features from potentially high-dimensional simulation outputs. Identity() kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def classifier_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , embedding_net_theta : nn . Module = nn . Identity (), embedding_net_x : nn . Module = nn . Identity (), ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a classifier for learning density ratios. This function will usually be used for SNRE. The returned function is to be passed to the inference class when using the flexible interface. Note that in the view of the SNRE classifier we build below, x=theta and y=x. Args: model: The type of classifier that will be created. One of [`linear`, `mlp`, `resnet`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. embedding_net_theta: Optional embedding network for parameters $\\theta$. embedding_net_x: Optional embedding network for simulation outputs $x$. This embedding net allows to learn features from potentially high-dimensional simulation outputs. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"embedding_net_x\" , \"embedding_net_y\" , ), ( z_score_theta , z_score_x , hidden_features , embedding_net_theta , embedding_net_x , ), ), ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"linear\" : return build_linear_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) if model == \"mlp\" : return build_mlp_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) if model == \"resnet\" : return build_resnet_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) else : raise NotImplementedError return build_fn Potentials \u00b6 sbi . inference . potentials . posterior_based_potential . posterior_estimator_based_potential ( posterior_estimator , prior , x_o , enable_transform = True ) \u00b6 Returns the potential for posterior-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. The potential is the same as the log-probability of the posterior_estimator , but it is set to \\(-\\inf\\) outside of the prior bounds. Parameters: Name Type Description Default posterior_estimator Module The neural network modelling the posterior. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the posterior. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/posterior_based_potential.py def posterior_estimator_based_potential ( posterior_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns the potential for posterior-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. The potential is the same as the log-probability of the `posterior_estimator`, but it is set to $-\\inf$ outside of the prior bounds. Args: posterior_estimator: The neural network modelling the posterior. prior: The prior distribution. x_o: The observed data at which to evaluate the posterior. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( posterior_estimator . parameters ()) . device ) potential_fn = PosteriorBasedPotential ( posterior_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform sbi . inference . potentials . likelihood_based_potential . likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o , enable_transform = True ) \u00b6 Returns potential \\(\\log(p(x_o|\\theta)p(\\theta))\\) for likelihood-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Parameters: Name Type Description Default likelihood_estimator Module The neural network modelling the likelihood. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the likelihood. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function \\(p(x_o|\\theta)p(\\theta)\\) and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/likelihood_based_potential.py def likelihood_estimator_based_potential ( likelihood_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns potential $\\log(p(x_o|\\theta)p(\\theta))$ for likelihood-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Args: likelihood_estimator: The neural network modelling the likelihood. prior: The prior distribution. x_o: The observed data at which to evaluate the likelihood. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function $p(x_o|\\theta)p(\\theta)$ and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( likelihood_estimator . parameters ()) . device ) potential_fn = LikelihoodBasedPotential ( likelihood_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform sbi . inference . potentials . ratio_based_potential . ratio_estimator_based_potential ( ratio_estimator , prior , x_o , enable_transform = True ) \u00b6 Returns the potential for ratio-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Parameters: Name Type Description Default ratio_estimator Module The neural network modelling likelihood-to-evidence ratio. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the likelihood-to-evidence ratio. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/ratio_based_potential.py def ratio_estimator_based_potential ( ratio_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns the potential for ratio-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Args: ratio_estimator: The neural network modelling likelihood-to-evidence ratio. prior: The prior distribution. x_o: The observed data at which to evaluate the likelihood-to-evidence ratio. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( ratio_estimator . parameters ()) . device ) potential_fn = RatioBasedPotential ( ratio_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform Analysis \u00b6 sbi . analysis . plot . pairplot ( samples , points = None , limits = None , subset = None , offdiag = 'hist' , diag = 'hist' , figsize = ( 10 , 10 ), labels = None , ticks = [], upper = None , fig = None , axes = None , ** kwargs ) \u00b6 Plot samples in a 2D grid showing marginals and pairwise marginals. Each of the diagonal plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Each upper-diagonal plot can be interpreted as a 2D-marginal of the distribution. Parameters: Name Type Description Default samples Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Samples used to build the histogram. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] List of additional points to scatter. None limits Union[List, torch.Tensor] Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on). None offdiag Union[str, List[str]] Plotting style for upper diagonal, {hist, scatter, contour, cond, None}. 'hist' upper Optional[str] deprecated, use offdiag instead. None diag Union[str, List[str]] Plotting style for diagonal, {hist, cond, None}. 'hist' figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def pairplot ( samples : Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , limits : Optional [ Union [ List , torch . Tensor ]] = None , subset : Optional [ List [ int ]] = None , offdiag : Optional [ Union [ List [ str ], str ]] = \"hist\" , diag : Optional [ Union [ List [ str ], str ]] = \"hist\" , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], upper : Optional [ str ] = None , fig = None , axes = None , ** kwargs , ): \"\"\" Plot samples in a 2D grid showing marginals and pairwise marginals. Each of the diagonal plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Each upper-diagonal plot can be interpreted as a 2D-marginal of the distribution. Args: samples: Samples used to build the histogram. points: List of additional points to scatter. limits: Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on). offdiag: Plotting style for upper diagonal, {hist, scatter, contour, cond, None}. upper: deprecated, use offdiag instead. diag: Plotting style for diagonal, {hist, cond, None}. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" # TODO: add color map support # TODO: automatically determine good bin sizes for histograms # TODO: add legend (if legend is True) opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) samples , dim , limits = prepare_for_plot ( samples , limits ) # checks. if opts [ \"legend\" ]: assert len ( opts [ \"samples_labels\" ]) >= len ( samples ), \"Provide at least as many labels as samples.\" if opts [ \"upper\" ] is not None : warn ( \"upper is deprecated, use offdiag instead.\" ) opts [ \"offdiag\" ] = opts [ \"upper\" ] # Prepare diag/upper/lower if type ( opts [ \"diag\" ]) is not list : opts [ \"diag\" ] = [ opts [ \"diag\" ] for _ in range ( len ( samples ))] if type ( opts [ \"offdiag\" ]) is not list : opts [ \"offdiag\" ] = [ opts [ \"offdiag\" ] for _ in range ( len ( samples ))] # if type(opts['lower']) is not list: # opts['lower'] = [opts['lower'] for _ in range(len(samples))] opts [ \"lower\" ] = None diag_func = get_diag_func ( samples , limits , opts , ** kwargs ) def offdiag_func ( row , col , limits , ** kwargs ): if len ( samples ) > 0 : for n , v in enumerate ( samples ): if opts [ \"offdiag\" ][ n ] == \"hist\" or opts [ \"offdiag\" ][ n ] == \"hist2d\" : hist , xedges , yedges = np . histogram2d ( v [:, col ], v [:, row ], range = [ [ limits [ col ][ 0 ], limits [ col ][ 1 ]], [ limits [ row ][ 0 ], limits [ row ][ 1 ]], ], ** opts [ \"hist_offdiag\" ], ) plt . imshow ( hist . T , origin = \"lower\" , extent = ( xedges [ 0 ], xedges [ - 1 ], yedges [ 0 ], yedges [ - 1 ], ), aspect = \"auto\" , ) elif opts [ \"offdiag\" ][ n ] in [ \"kde\" , \"kde2d\" , \"contour\" , \"contourf\" , ]: density = gaussian_kde ( v [:, [ col , row ]] . T , bw_method = opts [ \"kde_offdiag\" ][ \"bw_method\" ], ) X , Y = np . meshgrid ( np . linspace ( limits [ col ][ 0 ], limits [ col ][ 1 ], opts [ \"kde_offdiag\" ][ \"bins\" ], ), np . linspace ( limits [ row ][ 0 ], limits [ row ][ 1 ], opts [ \"kde_offdiag\" ][ \"bins\" ], ), ) positions = np . vstack ([ X . ravel (), Y . ravel ()]) Z = np . reshape ( density ( positions ) . T , X . shape ) if opts [ \"offdiag\" ][ n ] == \"kde\" or opts [ \"offdiag\" ][ n ] == \"kde2d\" : plt . imshow ( Z , extent = ( limits [ col ][ 0 ], limits [ col ][ 1 ], limits [ row ][ 0 ], limits [ row ][ 1 ], ), origin = \"lower\" , aspect = \"auto\" , ) elif opts [ \"offdiag\" ][ n ] == \"contour\" : if opts [ \"contour_offdiag\" ][ \"percentile\" ]: Z = probs2contours ( Z , opts [ \"contour_offdiag\" ][ \"levels\" ]) else : Z = ( Z - Z . min ()) / ( Z . max () - Z . min ()) plt . contour ( X , Y , Z , origin = \"lower\" , extent = [ limits [ col ][ 0 ], limits [ col ][ 1 ], limits [ row ][ 0 ], limits [ row ][ 1 ], ], colors = opts [ \"samples_colors\" ][ n ], levels = opts [ \"contour_offdiag\" ][ \"levels\" ], ) else : pass elif opts [ \"offdiag\" ][ n ] == \"scatter\" : plt . scatter ( v [:, col ], v [:, row ], color = opts [ \"samples_colors\" ][ n ], ** opts [ \"scatter_offdiag\" ], ) elif opts [ \"offdiag\" ][ n ] == \"plot\" : plt . plot ( v [:, col ], v [:, row ], color = opts [ \"samples_colors\" ][ n ], ** opts [ \"plot_offdiag\" ], ) else : pass return _arrange_plots ( diag_func , offdiag_func , dim , limits , points , opts , fig = fig , axes = axes ) sbi . analysis . plot . marginal_plot ( samples , points = None , limits = None , subset = None , diag = 'hist' , figsize = ( 10 , 10 ), labels = None , ticks = [], fig = None , axes = None , ** kwargs ) \u00b6 Plot samples in a row showing 1D marginals of selected dimensions. Each of the plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Parameters: Name Type Description Default samples Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Samples used to build the histogram. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] List of additional points to scatter. None limits Union[List, torch.Tensor] Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on). None diag Optional[str] Plotting style for 1D marginals, {hist, kde cond, None}. 'hist' figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] points_colors Colors of the points . required fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def marginal_plot ( samples : Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , limits : Optional [ Union [ List , torch . Tensor ]] = None , subset : Optional [ List [ int ]] = None , diag : Optional [ str ] = \"hist\" , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], fig = None , axes = None , ** kwargs , ): \"\"\" Plot samples in a row showing 1D marginals of selected dimensions. Each of the plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Args: samples: Samples used to build the histogram. points: List of additional points to scatter. limits: Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on). diag: Plotting style for 1D marginals, {hist, kde cond, None}. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. points_colors: Colors of the `points`. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) samples , dim , limits = prepare_for_plot ( samples , limits ) # Prepare diag/upper/lower if type ( opts [ \"diag\" ]) is not list : opts [ \"diag\" ] = [ opts [ \"diag\" ] for _ in range ( len ( samples ))] diag_func = get_diag_func ( samples , limits , opts , ** kwargs ) return _arrange_plots ( diag_func , None , dim , limits , points , opts , fig = fig , axes = axes ) sbi . analysis . plot . conditional_pairplot ( density , condition , limits , points = None , subset = None , resolution = 50 , figsize = ( 10 , 10 ), labels = None , ticks = [], fig = None , axes = None , ** kwargs ) \u00b6 Plot conditional distribution given all other parameters. The conditionals can be interpreted as slices through the density at a location given by condition . For example: Say we have a 3D density with parameters \\(\\theta_0\\) , \\(\\theta_1\\) , \\(\\theta_2\\) and a condition \\(c\\) passed by the user in the condition argument. For the plot of \\(\\theta_0\\) on the diagonal, this will plot the conditional \\(p(\\theta_0 | \\theta_1=c[1], \\theta_2=c[2])\\) . For the upper diagonal of \\(\\theta_1\\) and \\(\\theta_2\\) , it will plot \\(p(\\theta_1, \\theta_2 | \\theta_0=c[0])\\) . All other diagonals and upper-diagonals are built in the corresponding way. Parameters: Name Type Description Default density Any Probability density with a log_prob() method. required condition Tensor Condition that all but the one/two regarded parameters are fixed to. The condition should be of shape (1, dim_theta), i.e. it could e.g. be a sample from the posterior distribution. required limits Union[List, torch.Tensor] Limits in between which each parameter will be evaluated. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Additional points to scatter. None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on) None resolution int Resolution of the grid at which we evaluate the pdf . 50 figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] points_colors Colors of the points . required fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def conditional_pairplot ( density : Any , condition : torch . Tensor , limits : Union [ List , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , subset : Optional [ List [ int ]] = None , resolution : int = 50 , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], fig = None , axes = None , ** kwargs , ): r \"\"\" Plot conditional distribution given all other parameters. The conditionals can be interpreted as slices through the `density` at a location given by `condition`. For example: Say we have a 3D density with parameters $\\theta_0$, $\\theta_1$, $\\theta_2$ and a condition $c$ passed by the user in the `condition` argument. For the plot of $\\theta_0$ on the diagonal, this will plot the conditional $p(\\theta_0 | \\theta_1=c[1], \\theta_2=c[2])$. For the upper diagonal of $\\theta_1$ and $\\theta_2$, it will plot $p(\\theta_1, \\theta_2 | \\theta_0=c[0])$. All other diagonals and upper-diagonals are built in the corresponding way. Args: density: Probability density with a `log_prob()` method. condition: Condition that all but the one/two regarded parameters are fixed to. The condition should be of shape (1, dim_theta), i.e. it could e.g. be a sample from the posterior distribution. limits: Limits in between which each parameter will be evaluated. points: Additional points to scatter. subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on) resolution: Resolution of the grid at which we evaluate the `pdf`. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. points_colors: Colors of the `points`. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" device = density . _device if hasattr ( density , \"_device\" ) else \"cpu\" # Setting these is required because _pairplot_scaffold will check if opts['diag'] is # `None`. This would break if opts has no key 'diag'. Same for 'upper'. diag = \"cond\" offdiag = \"cond\" opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) opts [ \"lower\" ] = None dim , limits , eps_margins = prepare_for_conditional_plot ( condition , opts ) diag_func = get_conditional_diag_func ( opts , limits , eps_margins , resolution ) def offdiag_func ( row , col , ** kwargs ): p_image = ( eval_conditional_density ( opts [ \"density\" ], opts [ \"condition\" ] . to ( device ), limits . to ( device ), row , col , resolution = resolution , eps_margins1 = eps_margins [ row ], eps_margins2 = eps_margins [ col ], ) . to ( \"cpu\" ) . numpy () ) plt . imshow ( p_image . T , origin = \"lower\" , extent = ( limits [ col , 0 ] . item (), limits [ col , 1 ] . item (), limits [ row , 0 ] . item (), limits [ row , 1 ] . item (), ), aspect = \"auto\" , ) return _arrange_plots ( diag_func , offdiag_func , dim , limits , points , opts , fig = fig , axes = axes ) sbi . analysis . conditional_density . conditional_corrcoeff ( density , limits , condition , subset = None , resolution = 50 ) \u00b6 Returns the conditional correlation matrix of a distribution. To compute the conditional distribution, we condition all but two parameters to values from condition , and then compute the Pearson correlation coefficient \\(\\rho\\) between the remaining two parameters under the distribution density . We do so for any pair of parameters specified in subset , thus creating a matrix containing conditional correlations between any pair of parameters. If condition is a batch of conditions, this function computes the conditional correlation matrix for each one of them and returns the mean. Parameters: Name Type Description Default density Any Probability density function with .log_prob() function. required limits Tensor Limits within which to evaluate the density . required condition Tensor Values to condition the density on. If a batch of conditions is passed, we compute the conditional correlation matrix for each of them and return the average conditional correlation matrix. required subset Optional[List[int]] Evaluate the conditional distribution only on a subset of dimensions. If None this function uses all dimensions. None resolution int Number of grid points on which the conditional distribution is evaluated. A higher value increases the accuracy of the estimated correlation but also increases the computational cost. 50 Returns: Average conditional correlation matrix of shape either (num_dim, num_dim) or (len(subset), len(subset)) if subset was specified. Source code in sbi/analysis/conditional_density.py def conditional_corrcoeff ( density : Any , limits : Tensor , condition : Tensor , subset : Optional [ List [ int ]] = None , resolution : int = 50 , ) -> Tensor : r \"\"\"Returns the conditional correlation matrix of a distribution. To compute the conditional distribution, we condition all but two parameters to values from `condition`, and then compute the Pearson correlation coefficient $\\rho$ between the remaining two parameters under the distribution `density`. We do so for any pair of parameters specified in `subset`, thus creating a matrix containing conditional correlations between any pair of parameters. If `condition` is a batch of conditions, this function computes the conditional correlation matrix for each one of them and returns the mean. Args: density: Probability density function with `.log_prob()` function. limits: Limits within which to evaluate the `density`. condition: Values to condition the `density` on. If a batch of conditions is passed, we compute the conditional correlation matrix for each of them and return the average conditional correlation matrix. subset: Evaluate the conditional distribution only on a subset of dimensions. If `None` this function uses all dimensions. resolution: Number of grid points on which the conditional distribution is evaluated. A higher value increases the accuracy of the estimated correlation but also increases the computational cost. Returns: Average conditional correlation matrix of shape either `(num_dim, num_dim)` or `(len(subset), len(subset))` if `subset` was specified. \"\"\" device = density . _device if hasattr ( density , \"_device\" ) else \"cpu\" subset_ = subset if subset is not None else range ( condition . shape [ 1 ]) correlation_matrices = [] for cond in condition : correlation_matrices . append ( torch . stack ( [ compute_corrcoeff ( eval_conditional_density ( density , cond . to ( device ), limits . to ( device ), dim1 = dim1 , dim2 = dim2 , resolution = resolution , ), limits [[ dim1 , dim2 ]] . to ( device ), ) for dim1 in subset_ for dim2 in subset_ if dim1 < dim2 ] ) ) average_correlations = torch . mean ( torch . stack ( correlation_matrices ), dim = 0 ) # `average_correlations` is still a vector containing the upper triangular entries. # Below, assemble them into a matrix: av_correlation_matrix = torch . zeros (( len ( subset_ ), len ( subset_ )), device = device ) triu_indices = torch . triu_indices ( row = len ( subset_ ), col = len ( subset_ ), offset = 1 , device = device ) av_correlation_matrix [ triu_indices [ 0 ], triu_indices [ 1 ]] = average_correlations # Make the matrix symmetric by copying upper diagonal to lower diagonal. av_correlation_matrix = torch . triu ( av_correlation_matrix ) + torch . tril ( av_correlation_matrix . T ) av_correlation_matrix . fill_diagonal_ ( 1.0 ) return av_correlation_matrix","title":"API Reference"},{"location":"reference/#api-reference","text":"","title":"API Reference"},{"location":"reference/#inference","text":"","title":"Inference"},{"location":"reference/#sbi.inference.base.infer","text":"Runs simulation-based inference and returns the posterior. This function provides a simple interface to run sbi. Inference is run for a single round and hence the returned posterior \\(p(\\theta|x)\\) can be sampled and evaluated for any \\(x\\) (i.e. it is amortized). The scope of this function is limited to the most essential features of sbi. For more flexibility (e.g. multi-round inference, different density estimators) please use the flexible interface described here: https://www.mackelab.org/sbi/tutorial/02_flexible_interface/ Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\mathrm{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required prior Distribution A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. required method str What inference method to use. Either of SNPE, SNLE or SNRE. required num_simulations int Number of simulation calls. More simulations means a longer runtime, but a better posterior estimate. required num_workers int Number of parallel workers to use for simulations. 1 Returns: Posterior over parameters conditional on observations (amortized). Source code in sbi/inference/base.py def infer ( simulator : Callable , prior : Distribution , method : str , num_simulations : int , num_workers : int = 1 , ) -> NeuralPosterior : r \"\"\"Runs simulation-based inference and returns the posterior. This function provides a simple interface to run sbi. Inference is run for a single round and hence the returned posterior $p(\\theta|x)$ can be sampled and evaluated for any $x$ (i.e. it is amortized). The scope of this function is limited to the most essential features of sbi. For more flexibility (e.g. multi-round inference, different density estimators) please use the flexible interface described here: https://www.mackelab.org/sbi/tutorial/02_flexible_interface/ Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\mathrm{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. method: What inference method to use. Either of SNPE, SNLE or SNRE. num_simulations: Number of simulation calls. More simulations means a longer runtime, but a better posterior estimate. num_workers: Number of parallel workers to use for simulations. Returns: Posterior over parameters conditional on observations (amortized). \"\"\" try : method_fun : Callable = getattr ( sbi . inference , method . upper ()) except AttributeError : raise NameError ( \"Method not available. `method` must be one of 'SNPE', 'SNLE', 'SNRE'.\" ) simulator , prior = prepare_for_sbi ( simulator , prior ) inference = method_fun ( prior = prior ) theta , x = simulate_for_sbi ( simulator = simulator , proposal = prior , num_simulations = num_simulations , num_workers = num_workers , ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () return posterior","title":"infer()"},{"location":"reference/#sbi.utils.user_input_checks.prepare_for_sbi","text":"Prepare simulator and prior for usage in sbi. NOTE: This is a wrapper around process_prior and process_simulator which can be used in isolation as well. Attempts to meet the following requirements by reshaping and type-casting: the simulator function receives as input and returns a Tensor. the simulator can simulate batches of parameters and return batches of data. the prior does not produce batches and samples and evaluates to Tensor. the output shape is a torch.Size((1,N)) (i.e, has a leading batch dimension 1). If this is not possible, a suitable exception will be raised. Parameters: Name Type Description Default simulator Callable Simulator as provided by the user. required prior Prior as provided by the user. required Returns: Type Description Tuple[Callable, torch.distributions.distribution.Distribution] Tuple (simulator, prior) checked and matching the requirements of sbi. Source code in sbi/utils/user_input_checks.py def prepare_for_sbi ( simulator : Callable , prior ) -> Tuple [ Callable , Distribution ]: \"\"\"Prepare simulator and prior for usage in sbi. NOTE: This is a wrapper around `process_prior` and `process_simulator` which can be used in isolation as well. Attempts to meet the following requirements by reshaping and type-casting: - the simulator function receives as input and returns a Tensor.<br/> - the simulator can simulate batches of parameters and return batches of data.<br/> - the prior does not produce batches and samples and evaluates to Tensor.<br/> - the output shape is a `torch.Size((1,N))` (i.e, has a leading batch dimension 1). If this is not possible, a suitable exception will be raised. Args: simulator: Simulator as provided by the user. prior: Prior as provided by the user. Returns: Tuple (simulator, prior) checked and matching the requirements of sbi. \"\"\" # Check prior, return PyTorch prior. prior , _ , prior_returns_numpy = process_prior ( prior ) # Check simulator, returns PyTorch simulator able to simulate batches. simulator = process_simulator ( simulator , prior , prior_returns_numpy ) # Consistency check after making ready for sbi. check_sbi_inputs ( simulator , prior ) return simulator , prior","title":"prepare_for_sbi()"},{"location":"reference/#sbi.inference.base.simulate_for_sbi","text":"Returns ( \\(\\theta, x\\) ) pairs obtained from sampling the proposal and simulating. This function performs two steps: Sample parameters \\(\\theta\\) from the proposal . Simulate these parameters to obtain \\(x\\) . Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\text{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required proposal Any Probability distribution that the parameters \\(\\theta\\) are sampled from. required num_simulations int Number of simulations that are run. required num_workers int Number of parallel workers to use for simulations. 1 simulation_batch_size int Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). 1 seed Optional[int] Seed for reproducibility. None show_progress_bar bool Whether to show a progress bar for simulating. This will not affect whether there will be a progressbar while drawing samples from the proposal. True Returns: Sampled parameters \\(\\theta\\) and simulation-outputs \\(x\\) . Source code in sbi/inference/base.py def simulate_for_sbi ( simulator : Callable , proposal : Any , num_simulations : int , num_workers : int = 1 , simulation_batch_size : int = 1 , seed : Optional [ int ] = None , show_progress_bar : bool = True , ) -> Tuple [ Tensor , Tensor ]: r \"\"\"Returns ($\\theta, x$) pairs obtained from sampling the proposal and simulating. This function performs two steps: - Sample parameters $\\theta$ from the `proposal`. - Simulate these parameters to obtain $x$. Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\text{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. proposal: Probability distribution that the parameters $\\theta$ are sampled from. num_simulations: Number of simulations that are run. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). seed: Seed for reproducibility. show_progress_bar: Whether to show a progress bar for simulating. This will not affect whether there will be a progressbar while drawing samples from the proposal. Returns: Sampled parameters $\\theta$ and simulation-outputs $x$. \"\"\" theta = proposal . sample (( num_simulations ,)) x = simulate_in_batches ( simulator = simulator , theta = theta , sim_batch_size = simulation_batch_size , num_workers = num_workers , seed = seed , show_progress_bars = show_progress_bar , ) return theta , x","title":"simulate_for_sbi()"},{"location":"reference/#sbi.inference.snpe.snpe_a.SNPE_A","text":"","title":"SNPE_A"},{"location":"reference/#sbi.inference.snpe.snpe_c.SNPE_C","text":"","title":"SNPE_C"},{"location":"reference/#sbi.inference.snle.snle_a.SNLE_A","text":"","title":"SNLE_A"},{"location":"reference/#sbi.inference.snre.snre_a.SNRE_A","text":"","title":"SNRE_A"},{"location":"reference/#sbi.inference.snre.snre_b.SNRE_B","text":"","title":"SNRE_B"},{"location":"reference/#sbi.inference.snre.snre_c.SNRE_C","text":"","title":"SNRE_C"},{"location":"reference/#sbi.inference.snre.bnre.BNRE","text":"","title":"BNRE"},{"location":"reference/#sbi.inference.abc.mcabc.MCABC","text":"","title":"MCABC"},{"location":"reference/#sbi.inference.abc.smcabc.SMCABC","text":"","title":"SMCABC"},{"location":"reference/#posteriors","text":"","title":"Posteriors"},{"location":"reference/#sbi.inference.posteriors.direct_posterior.DirectPosterior","text":"Posterior \\(p(\\theta|x_o)\\) with log_prob() and sample() methods, only applicable to SNPE. SNPE trains a neural network to directly approximate the posterior distribution. However, for bounded priors, the neural network can have leakage: it puts non-zero mass in regions where the prior is zero. The DirectPosterior class wraps the trained network to deal with these cases. Specifically, this class offers the following functionality: - correct the calculation of the log probability such that it compensates for the leakage. - reject samples that lie outside of the prior bounds. This class can not be used in combination with SNLE or SNRE.","title":"DirectPosterior"},{"location":"reference/#sbi.inference.posteriors.importance_posterior.ImportanceSamplingPosterior","text":"Provides importance sampling to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). ImportanceSamplingPosterior allows to estimate the posterior log-probability by estimating the normlalization constant with importance sampling. It also allows to perform importance sampling (with .sample() ) and to draw approximate samples with sampling-importance-resampling (SIR) (with .sir_sample() )","title":"ImportanceSamplingPosterior"},{"location":"reference/#sbi.inference.posteriors.mcmc_posterior.MCMCPosterior","text":"Provides MCMC to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). MCMCPosterior allows to sample from the posterior with MCMC.","title":"MCMCPosterior"},{"location":"reference/#sbi.inference.posteriors.rejection_posterior.RejectionPosterior","text":"Provides rejection sampling to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). RejectionPosterior allows to sample from the posterior with rejection sampling.","title":"RejectionPosterior"},{"location":"reference/#sbi.inference.posteriors.vi_posterior.VIPosterior","text":"Provides VI (Variational Inference) to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). VIPosterior allows to learn a tractable variational posterior \\(q(\\theta)\\) which approximates the true posterior \\(p(\\theta|x_o)\\) . After this second training stage, we can produce approximate posterior samples, by just sampling from q with no additional cost. For additional information see [1] and [2]. References: [1] Variational methods for simulation-based inference, Manuel Gl\u00f6ckler, Michael Deistler, Jakob Macke, 2022, https://openreview.net/forum?id=kZ0UYdhqkNY [2] Sequential Neural Posterior and Likelihood Approximation, Samuel Wiqvist, Jes Frellsen, Umberto Picchini, 2021, https://arxiv.org/abs/2102.06522","title":"VIPosterior"},{"location":"reference/#models","text":"","title":"Models"},{"location":"reference/#sbi.utils.get_nn_models.posterior_nn","text":"Returns a function that builds a density estimator for learning the posterior. This function will usually be used for SNPE. The returned function is to be passed to the inference class when using the flexible interface. Parameters: Name Type Description Default model str The type of density estimator that will be created. One of [ mdn , made , maf , maf_rqs , nsf ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 num_transforms int Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a maf or a nsf ). Ignored if density estimator is a mdn or made . 5 num_bins int Number of bins used for the splines in nsf . Ignored if density estimator not nsf . 10 embedding_net Module Optional embedding network for simulation outputs \\(x\\) . This embedding net allows to learn features from potentially high-dimensional simulation outputs. Identity() num_components int Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. 10 kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def posterior_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , num_transforms : int = 5 , num_bins : int = 10 , embedding_net : nn . Module = nn . Identity (), num_components : int = 10 , ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a density estimator for learning the posterior. This function will usually be used for SNPE. The returned function is to be passed to the inference class when using the flexible interface. Args: model: The type of density estimator that will be created. One of [`mdn`, `made`, `maf`, `maf_rqs`, `nsf`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. num_transforms: Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a `maf` or a `nsf`). Ignored if density estimator is a `mdn` or `made`. num_bins: Number of bins used for the splines in `nsf`. Ignored if density estimator not `nsf`. embedding_net: Optional embedding network for simulation outputs $x$. This embedding net allows to learn features from potentially high-dimensional simulation outputs. num_components: Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"num_transforms\" , \"num_bins\" , \"embedding_net\" , \"num_components\" , ), ( z_score_theta , z_score_x , hidden_features , num_transforms , num_bins , embedding_net , num_components , ), ), ** kwargs , ) def build_fn_snpe_a ( batch_theta , batch_x , num_components ): \"\"\"Build function for SNPE-A Extract the number of components from the kwargs, such that they are exposed as a kwargs, offering the possibility to later override this kwarg with `functools.partial`. This is necessary in order to make sure that the MDN in SNPE-A only has one component when running the Algorithm 1 part. \"\"\" return build_mdn ( batch_x = batch_theta , batch_y = batch_x , num_components = num_components , ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"mdn\" : return build_mdn ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"made\" : return build_made ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"maf\" : return build_maf ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"maf_rqs\" : return build_maf_rqs ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"nsf\" : return build_nsf ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) else : raise NotImplementedError if model == \"mdn_snpe_a\" : if num_components != 10 : raise ValueError ( \"You set `num_components`. For SNPE-A, this has to be done at \" \"instantiation of the inference object, i.e. \" \"`inference = SNPE_A(..., num_components=20)`\" ) kwargs . pop ( \"num_components\" ) return build_fn_snpe_a if model == \"mdn_snpe_a\" else build_fn","title":"posterior_nn()"},{"location":"reference/#sbi.utils.get_nn_models.likelihood_nn","text":"Returns a function that builds a density estimator for learning the likelihood. This function will usually be used for SNLE. The returned function is to be passed to the inference class when using the flexible interface. Parameters: Name Type Description Default model str The type of density estimator that will be created. One of [ mdn , made , maf , maf_rqs , nsf ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 num_transforms int Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a maf or a nsf ). Ignored if density estimator is a mdn or made . 5 num_bins int Number of bins used for the splines in nsf . Ignored if density estimator not nsf . 10 embedding_net Module Optional embedding network for parameters \\(\\theta\\) . Identity() num_components int Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. 10 kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def likelihood_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , num_transforms : int = 5 , num_bins : int = 10 , embedding_net : nn . Module = nn . Identity (), num_components : int = 10 , ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a density estimator for learning the likelihood. This function will usually be used for SNLE. The returned function is to be passed to the inference class when using the flexible interface. Args: model: The type of density estimator that will be created. One of [`mdn`, `made`, `maf`, `maf_rqs`, `nsf`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. num_transforms: Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a `maf` or a `nsf`). Ignored if density estimator is a `mdn` or `made`. num_bins: Number of bins used for the splines in `nsf`. Ignored if density estimator not `nsf`. embedding_net: Optional embedding network for parameters $\\theta$. num_components: Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"num_transforms\" , \"num_bins\" , \"embedding_net\" , \"num_components\" , ), ( z_score_x , z_score_theta , hidden_features , num_transforms , num_bins , embedding_net , num_components , ), ), ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"mdn\" : return build_mdn ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"made\" : return build_made ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"maf\" : return build_maf ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"maf_rqs\" : return build_maf_rqs ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"nsf\" : return build_nsf ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"mnle\" : return build_mnle ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) else : raise NotImplementedError return build_fn","title":"likelihood_nn()"},{"location":"reference/#sbi.utils.get_nn_models.classifier_nn","text":"Returns a function that builds a classifier for learning density ratios. This function will usually be used for SNRE. The returned function is to be passed to the inference class when using the flexible interface. Note that in the view of the SNRE classifier we build below, x=theta and y=x. Parameters: Name Type Description Default model str The type of classifier that will be created. One of [ linear , mlp , resnet ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 embedding_net_theta Module Optional embedding network for parameters \\(\\theta\\) . Identity() embedding_net_x Module Optional embedding network for simulation outputs \\(x\\) . This embedding net allows to learn features from potentially high-dimensional simulation outputs. Identity() kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def classifier_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , embedding_net_theta : nn . Module = nn . Identity (), embedding_net_x : nn . Module = nn . Identity (), ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a classifier for learning density ratios. This function will usually be used for SNRE. The returned function is to be passed to the inference class when using the flexible interface. Note that in the view of the SNRE classifier we build below, x=theta and y=x. Args: model: The type of classifier that will be created. One of [`linear`, `mlp`, `resnet`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. embedding_net_theta: Optional embedding network for parameters $\\theta$. embedding_net_x: Optional embedding network for simulation outputs $x$. This embedding net allows to learn features from potentially high-dimensional simulation outputs. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"embedding_net_x\" , \"embedding_net_y\" , ), ( z_score_theta , z_score_x , hidden_features , embedding_net_theta , embedding_net_x , ), ), ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"linear\" : return build_linear_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) if model == \"mlp\" : return build_mlp_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) if model == \"resnet\" : return build_resnet_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) else : raise NotImplementedError return build_fn","title":"classifier_nn()"},{"location":"reference/#potentials","text":"","title":"Potentials"},{"location":"reference/#sbi.inference.potentials.posterior_based_potential.posterior_estimator_based_potential","text":"Returns the potential for posterior-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. The potential is the same as the log-probability of the posterior_estimator , but it is set to \\(-\\inf\\) outside of the prior bounds. Parameters: Name Type Description Default posterior_estimator Module The neural network modelling the posterior. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the posterior. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/posterior_based_potential.py def posterior_estimator_based_potential ( posterior_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns the potential for posterior-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. The potential is the same as the log-probability of the `posterior_estimator`, but it is set to $-\\inf$ outside of the prior bounds. Args: posterior_estimator: The neural network modelling the posterior. prior: The prior distribution. x_o: The observed data at which to evaluate the posterior. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( posterior_estimator . parameters ()) . device ) potential_fn = PosteriorBasedPotential ( posterior_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform","title":"posterior_estimator_based_potential()"},{"location":"reference/#sbi.inference.potentials.likelihood_based_potential.likelihood_estimator_based_potential","text":"Returns potential \\(\\log(p(x_o|\\theta)p(\\theta))\\) for likelihood-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Parameters: Name Type Description Default likelihood_estimator Module The neural network modelling the likelihood. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the likelihood. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function \\(p(x_o|\\theta)p(\\theta)\\) and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/likelihood_based_potential.py def likelihood_estimator_based_potential ( likelihood_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns potential $\\log(p(x_o|\\theta)p(\\theta))$ for likelihood-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Args: likelihood_estimator: The neural network modelling the likelihood. prior: The prior distribution. x_o: The observed data at which to evaluate the likelihood. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function $p(x_o|\\theta)p(\\theta)$ and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( likelihood_estimator . parameters ()) . device ) potential_fn = LikelihoodBasedPotential ( likelihood_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform","title":"likelihood_estimator_based_potential()"},{"location":"reference/#sbi.inference.potentials.ratio_based_potential.ratio_estimator_based_potential","text":"Returns the potential for ratio-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Parameters: Name Type Description Default ratio_estimator Module The neural network modelling likelihood-to-evidence ratio. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the likelihood-to-evidence ratio. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/ratio_based_potential.py def ratio_estimator_based_potential ( ratio_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns the potential for ratio-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Args: ratio_estimator: The neural network modelling likelihood-to-evidence ratio. prior: The prior distribution. x_o: The observed data at which to evaluate the likelihood-to-evidence ratio. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( ratio_estimator . parameters ()) . device ) potential_fn = RatioBasedPotential ( ratio_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform","title":"ratio_estimator_based_potential()"},{"location":"reference/#analysis","text":"","title":"Analysis"},{"location":"reference/#sbi.analysis.plot.pairplot","text":"Plot samples in a 2D grid showing marginals and pairwise marginals. Each of the diagonal plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Each upper-diagonal plot can be interpreted as a 2D-marginal of the distribution. Parameters: Name Type Description Default samples Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Samples used to build the histogram. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] List of additional points to scatter. None limits Union[List, torch.Tensor] Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on). None offdiag Union[str, List[str]] Plotting style for upper diagonal, {hist, scatter, contour, cond, None}. 'hist' upper Optional[str] deprecated, use offdiag instead. None diag Union[str, List[str]] Plotting style for diagonal, {hist, cond, None}. 'hist' figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def pairplot ( samples : Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , limits : Optional [ Union [ List , torch . Tensor ]] = None , subset : Optional [ List [ int ]] = None , offdiag : Optional [ Union [ List [ str ], str ]] = \"hist\" , diag : Optional [ Union [ List [ str ], str ]] = \"hist\" , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], upper : Optional [ str ] = None , fig = None , axes = None , ** kwargs , ): \"\"\" Plot samples in a 2D grid showing marginals and pairwise marginals. Each of the diagonal plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Each upper-diagonal plot can be interpreted as a 2D-marginal of the distribution. Args: samples: Samples used to build the histogram. points: List of additional points to scatter. limits: Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on). offdiag: Plotting style for upper diagonal, {hist, scatter, contour, cond, None}. upper: deprecated, use offdiag instead. diag: Plotting style for diagonal, {hist, cond, None}. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" # TODO: add color map support # TODO: automatically determine good bin sizes for histograms # TODO: add legend (if legend is True) opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) samples , dim , limits = prepare_for_plot ( samples , limits ) # checks. if opts [ \"legend\" ]: assert len ( opts [ \"samples_labels\" ]) >= len ( samples ), \"Provide at least as many labels as samples.\" if opts [ \"upper\" ] is not None : warn ( \"upper is deprecated, use offdiag instead.\" ) opts [ \"offdiag\" ] = opts [ \"upper\" ] # Prepare diag/upper/lower if type ( opts [ \"diag\" ]) is not list : opts [ \"diag\" ] = [ opts [ \"diag\" ] for _ in range ( len ( samples ))] if type ( opts [ \"offdiag\" ]) is not list : opts [ \"offdiag\" ] = [ opts [ \"offdiag\" ] for _ in range ( len ( samples ))] # if type(opts['lower']) is not list: # opts['lower'] = [opts['lower'] for _ in range(len(samples))] opts [ \"lower\" ] = None diag_func = get_diag_func ( samples , limits , opts , ** kwargs ) def offdiag_func ( row , col , limits , ** kwargs ): if len ( samples ) > 0 : for n , v in enumerate ( samples ): if opts [ \"offdiag\" ][ n ] == \"hist\" or opts [ \"offdiag\" ][ n ] == \"hist2d\" : hist , xedges , yedges = np . histogram2d ( v [:, col ], v [:, row ], range = [ [ limits [ col ][ 0 ], limits [ col ][ 1 ]], [ limits [ row ][ 0 ], limits [ row ][ 1 ]], ], ** opts [ \"hist_offdiag\" ], ) plt . imshow ( hist . T , origin = \"lower\" , extent = ( xedges [ 0 ], xedges [ - 1 ], yedges [ 0 ], yedges [ - 1 ], ), aspect = \"auto\" , ) elif opts [ \"offdiag\" ][ n ] in [ \"kde\" , \"kde2d\" , \"contour\" , \"contourf\" , ]: density = gaussian_kde ( v [:, [ col , row ]] . T , bw_method = opts [ \"kde_offdiag\" ][ \"bw_method\" ], ) X , Y = np . meshgrid ( np . linspace ( limits [ col ][ 0 ], limits [ col ][ 1 ], opts [ \"kde_offdiag\" ][ \"bins\" ], ), np . linspace ( limits [ row ][ 0 ], limits [ row ][ 1 ], opts [ \"kde_offdiag\" ][ \"bins\" ], ), ) positions = np . vstack ([ X . ravel (), Y . ravel ()]) Z = np . reshape ( density ( positions ) . T , X . shape ) if opts [ \"offdiag\" ][ n ] == \"kde\" or opts [ \"offdiag\" ][ n ] == \"kde2d\" : plt . imshow ( Z , extent = ( limits [ col ][ 0 ], limits [ col ][ 1 ], limits [ row ][ 0 ], limits [ row ][ 1 ], ), origin = \"lower\" , aspect = \"auto\" , ) elif opts [ \"offdiag\" ][ n ] == \"contour\" : if opts [ \"contour_offdiag\" ][ \"percentile\" ]: Z = probs2contours ( Z , opts [ \"contour_offdiag\" ][ \"levels\" ]) else : Z = ( Z - Z . min ()) / ( Z . max () - Z . min ()) plt . contour ( X , Y , Z , origin = \"lower\" , extent = [ limits [ col ][ 0 ], limits [ col ][ 1 ], limits [ row ][ 0 ], limits [ row ][ 1 ], ], colors = opts [ \"samples_colors\" ][ n ], levels = opts [ \"contour_offdiag\" ][ \"levels\" ], ) else : pass elif opts [ \"offdiag\" ][ n ] == \"scatter\" : plt . scatter ( v [:, col ], v [:, row ], color = opts [ \"samples_colors\" ][ n ], ** opts [ \"scatter_offdiag\" ], ) elif opts [ \"offdiag\" ][ n ] == \"plot\" : plt . plot ( v [:, col ], v [:, row ], color = opts [ \"samples_colors\" ][ n ], ** opts [ \"plot_offdiag\" ], ) else : pass return _arrange_plots ( diag_func , offdiag_func , dim , limits , points , opts , fig = fig , axes = axes )","title":"pairplot()"},{"location":"reference/#sbi.analysis.plot.marginal_plot","text":"Plot samples in a row showing 1D marginals of selected dimensions. Each of the plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Parameters: Name Type Description Default samples Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Samples used to build the histogram. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] List of additional points to scatter. None limits Union[List, torch.Tensor] Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on). None diag Optional[str] Plotting style for 1D marginals, {hist, kde cond, None}. 'hist' figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] points_colors Colors of the points . required fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def marginal_plot ( samples : Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , limits : Optional [ Union [ List , torch . Tensor ]] = None , subset : Optional [ List [ int ]] = None , diag : Optional [ str ] = \"hist\" , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], fig = None , axes = None , ** kwargs , ): \"\"\" Plot samples in a row showing 1D marginals of selected dimensions. Each of the plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Args: samples: Samples used to build the histogram. points: List of additional points to scatter. limits: Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on). diag: Plotting style for 1D marginals, {hist, kde cond, None}. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. points_colors: Colors of the `points`. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) samples , dim , limits = prepare_for_plot ( samples , limits ) # Prepare diag/upper/lower if type ( opts [ \"diag\" ]) is not list : opts [ \"diag\" ] = [ opts [ \"diag\" ] for _ in range ( len ( samples ))] diag_func = get_diag_func ( samples , limits , opts , ** kwargs ) return _arrange_plots ( diag_func , None , dim , limits , points , opts , fig = fig , axes = axes )","title":"marginal_plot()"},{"location":"reference/#sbi.analysis.plot.conditional_pairplot","text":"Plot conditional distribution given all other parameters. The conditionals can be interpreted as slices through the density at a location given by condition . For example: Say we have a 3D density with parameters \\(\\theta_0\\) , \\(\\theta_1\\) , \\(\\theta_2\\) and a condition \\(c\\) passed by the user in the condition argument. For the plot of \\(\\theta_0\\) on the diagonal, this will plot the conditional \\(p(\\theta_0 | \\theta_1=c[1], \\theta_2=c[2])\\) . For the upper diagonal of \\(\\theta_1\\) and \\(\\theta_2\\) , it will plot \\(p(\\theta_1, \\theta_2 | \\theta_0=c[0])\\) . All other diagonals and upper-diagonals are built in the corresponding way. Parameters: Name Type Description Default density Any Probability density with a log_prob() method. required condition Tensor Condition that all but the one/two regarded parameters are fixed to. The condition should be of shape (1, dim_theta), i.e. it could e.g. be a sample from the posterior distribution. required limits Union[List, torch.Tensor] Limits in between which each parameter will be evaluated. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Additional points to scatter. None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on) None resolution int Resolution of the grid at which we evaluate the pdf . 50 figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] points_colors Colors of the points . required fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def conditional_pairplot ( density : Any , condition : torch . Tensor , limits : Union [ List , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , subset : Optional [ List [ int ]] = None , resolution : int = 50 , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], fig = None , axes = None , ** kwargs , ): r \"\"\" Plot conditional distribution given all other parameters. The conditionals can be interpreted as slices through the `density` at a location given by `condition`. For example: Say we have a 3D density with parameters $\\theta_0$, $\\theta_1$, $\\theta_2$ and a condition $c$ passed by the user in the `condition` argument. For the plot of $\\theta_0$ on the diagonal, this will plot the conditional $p(\\theta_0 | \\theta_1=c[1], \\theta_2=c[2])$. For the upper diagonal of $\\theta_1$ and $\\theta_2$, it will plot $p(\\theta_1, \\theta_2 | \\theta_0=c[0])$. All other diagonals and upper-diagonals are built in the corresponding way. Args: density: Probability density with a `log_prob()` method. condition: Condition that all but the one/two regarded parameters are fixed to. The condition should be of shape (1, dim_theta), i.e. it could e.g. be a sample from the posterior distribution. limits: Limits in between which each parameter will be evaluated. points: Additional points to scatter. subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on) resolution: Resolution of the grid at which we evaluate the `pdf`. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. points_colors: Colors of the `points`. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" device = density . _device if hasattr ( density , \"_device\" ) else \"cpu\" # Setting these is required because _pairplot_scaffold will check if opts['diag'] is # `None`. This would break if opts has no key 'diag'. Same for 'upper'. diag = \"cond\" offdiag = \"cond\" opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) opts [ \"lower\" ] = None dim , limits , eps_margins = prepare_for_conditional_plot ( condition , opts ) diag_func = get_conditional_diag_func ( opts , limits , eps_margins , resolution ) def offdiag_func ( row , col , ** kwargs ): p_image = ( eval_conditional_density ( opts [ \"density\" ], opts [ \"condition\" ] . to ( device ), limits . to ( device ), row , col , resolution = resolution , eps_margins1 = eps_margins [ row ], eps_margins2 = eps_margins [ col ], ) . to ( \"cpu\" ) . numpy () ) plt . imshow ( p_image . T , origin = \"lower\" , extent = ( limits [ col , 0 ] . item (), limits [ col , 1 ] . item (), limits [ row , 0 ] . item (), limits [ row , 1 ] . item (), ), aspect = \"auto\" , ) return _arrange_plots ( diag_func , offdiag_func , dim , limits , points , opts , fig = fig , axes = axes )","title":"conditional_pairplot()"},{"location":"reference/#sbi.analysis.conditional_density.conditional_corrcoeff","text":"Returns the conditional correlation matrix of a distribution. To compute the conditional distribution, we condition all but two parameters to values from condition , and then compute the Pearson correlation coefficient \\(\\rho\\) between the remaining two parameters under the distribution density . We do so for any pair of parameters specified in subset , thus creating a matrix containing conditional correlations between any pair of parameters. If condition is a batch of conditions, this function computes the conditional correlation matrix for each one of them and returns the mean. Parameters: Name Type Description Default density Any Probability density function with .log_prob() function. required limits Tensor Limits within which to evaluate the density . required condition Tensor Values to condition the density on. If a batch of conditions is passed, we compute the conditional correlation matrix for each of them and return the average conditional correlation matrix. required subset Optional[List[int]] Evaluate the conditional distribution only on a subset of dimensions. If None this function uses all dimensions. None resolution int Number of grid points on which the conditional distribution is evaluated. A higher value increases the accuracy of the estimated correlation but also increases the computational cost. 50 Returns: Average conditional correlation matrix of shape either (num_dim, num_dim) or (len(subset), len(subset)) if subset was specified. Source code in sbi/analysis/conditional_density.py def conditional_corrcoeff ( density : Any , limits : Tensor , condition : Tensor , subset : Optional [ List [ int ]] = None , resolution : int = 50 , ) -> Tensor : r \"\"\"Returns the conditional correlation matrix of a distribution. To compute the conditional distribution, we condition all but two parameters to values from `condition`, and then compute the Pearson correlation coefficient $\\rho$ between the remaining two parameters under the distribution `density`. We do so for any pair of parameters specified in `subset`, thus creating a matrix containing conditional correlations between any pair of parameters. If `condition` is a batch of conditions, this function computes the conditional correlation matrix for each one of them and returns the mean. Args: density: Probability density function with `.log_prob()` function. limits: Limits within which to evaluate the `density`. condition: Values to condition the `density` on. If a batch of conditions is passed, we compute the conditional correlation matrix for each of them and return the average conditional correlation matrix. subset: Evaluate the conditional distribution only on a subset of dimensions. If `None` this function uses all dimensions. resolution: Number of grid points on which the conditional distribution is evaluated. A higher value increases the accuracy of the estimated correlation but also increases the computational cost. Returns: Average conditional correlation matrix of shape either `(num_dim, num_dim)` or `(len(subset), len(subset))` if `subset` was specified. \"\"\" device = density . _device if hasattr ( density , \"_device\" ) else \"cpu\" subset_ = subset if subset is not None else range ( condition . shape [ 1 ]) correlation_matrices = [] for cond in condition : correlation_matrices . append ( torch . stack ( [ compute_corrcoeff ( eval_conditional_density ( density , cond . to ( device ), limits . to ( device ), dim1 = dim1 , dim2 = dim2 , resolution = resolution , ), limits [[ dim1 , dim2 ]] . to ( device ), ) for dim1 in subset_ for dim2 in subset_ if dim1 < dim2 ] ) ) average_correlations = torch . mean ( torch . stack ( correlation_matrices ), dim = 0 ) # `average_correlations` is still a vector containing the upper triangular entries. # Below, assemble them into a matrix: av_correlation_matrix = torch . zeros (( len ( subset_ ), len ( subset_ )), device = device ) triu_indices = torch . triu_indices ( row = len ( subset_ ), col = len ( subset_ ), offset = 1 , device = device ) av_correlation_matrix [ triu_indices [ 0 ], triu_indices [ 1 ]] = average_correlations # Make the matrix symmetric by copying upper diagonal to lower diagonal. av_correlation_matrix = torch . triu ( av_correlation_matrix ) + torch . tril ( av_correlation_matrix . T ) av_correlation_matrix . fill_diagonal_ ( 1.0 ) return av_correlation_matrix","title":"conditional_corrcoeff()"},{"location":"examples/00_HH_simulator/","text":"Inference on Hodgkin-Huxley model: tutorial \u00b6 In this tutorial, we use sbi to do inference on a Hodgkin-Huxley model from neuroscience (Hodgkin and Huxley, 1952). We will learn two parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ) based on a current-clamp recording, that we generate synthetically (in practice, this would be an experimental observation). Note, you find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/examples/00_HH_simulator.ipynb in the sbi repository. First we are going to import basic packages. import numpy as np import torch # visualization import matplotlib as mpl import matplotlib.pyplot as plt # sbi from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer # remove top and right axis from plots mpl . rcParams [ \"axes.spines.right\" ] = False mpl . rcParams [ \"axes.spines.top\" ] = False Different required components \u00b6 Before running inference, let us define the different required components: observed data prior over model parameters simulator 1. Observed data \u00b6 Let us assume we current-clamped a neuron and recorded the following voltage trace: In fact, this voltage trace was not measured experimentally but synthetically generated by simulating a Hodgkin-Huxley model with particular parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ). We will come back to this point later in the tutorial. 2. Simulator \u00b6 We would like to infer the posterior over the two parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ) of a Hodgkin-Huxley model, given the observed electrophysiological recording above. The model has channel kinetics as in Pospischil et al. 2008 , and is defined by the following set of differential equations (parameters of interest highlighted in orange): \\[ \\scriptsize \\begin{align} C_m\\frac{dV}{dt}&=g_1\\left(E_1-V\\right)+ \\color{orange}{\\bar{g}_{Na}}m^3h\\left(E_{Na}-V\\right)+ \\color{orange}{\\bar{g}_{K}}n^4\\left(E_K-V\\right)+ \\bar{g}_Mp\\left(E_K-V\\right)+ I_{inj}+ \\sigma\\eta\\left(t\\right)\\\\ \\frac{dq}{dt}&=\\frac{q_\\infty\\left(V\\right)-q}{\\tau_q\\left(V\\right)},\\;q\\in\\{m,h,n,p\\} \\end{align} \\] Above, \\(V\\) represents the membrane potential, \\(C_m\\) is the membrane capacitance, \\(g_{\\text{l}}\\) is the leak conductance, \\(E_{\\text{l}}\\) is the membrane reversal potential, \\(\\bar{g}_c\\) is the density of channels of type \\(c\\) ( \\(\\text{Na}^+\\) , \\(\\text{K}^+\\) , M), \\(E_c\\) is the reversal potential of \\(c\\) , ( \\(m\\) , \\(h\\) , \\(n\\) , \\(p\\) ) are the respective channel gating kinetic variables, and \\(\\sigma \\eta(t)\\) is the intrinsic neural noise. The right hand side of the voltage dynamics is composed of a leak current, a voltage-dependent \\(\\text{Na}^+\\) current, a delayed-rectifier \\(\\text{K}^+\\) current, a slow voltage-dependent \\(\\text{K}^+\\) current responsible for spike-frequency adaptation, and an injected current \\(I_{\\text{inj}}\\) . Channel gating variables \\(q\\) have dynamics fully characterized by the neuron membrane potential \\(V\\) , given the respective steady-state \\(q_{\\infty}(V)\\) and time constant \\(\\tau_{q}(V)\\) (details in Pospischil et al. 2008). The input current \\(I_{\\text{inj}}\\) is defined as from HH_helper_functions import syn_current I , t_on , t_off , dt , t , A_soma = syn_current () The Hodgkin-Huxley simulator is given by: from HH_helper_functions import HHsimulator Putting the input current and the simulator together: def run_HH_model ( params ): params = np . asarray ( params ) # input current, time step I , t_on , t_off , dt , t , A_soma = syn_current () t = np . arange ( 0 , len ( I ), 1 ) * dt # initial voltage V0 = - 70 states = HHsimulator ( V0 , params . reshape ( 1 , - 1 ), dt , t , I ) return dict ( data = states . reshape ( - 1 ), time = t , dt = dt , I = I . reshape ( - 1 )) To get an idea of the output of the Hodgkin-Huxley model, let us generate some voltage traces for different parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), given the input current \\(I_{\\text{inj}}\\) : # three sets of (g_Na, g_K) params = np . array ([[ 50.0 , 1.0 ], [ 4.0 , 1.5 ], [ 20.0 , 15.0 ]]) num_samples = len ( params [:, 0 ]) sim_samples = np . zeros (( num_samples , len ( I ))) for i in range ( num_samples ): sim_samples [ i , :] = run_HH_model ( params = params [ i , :])[ \"data\" ] # colors for traces col_min = 2 num_colors = num_samples + col_min cm1 = mpl . cm . Blues col1 = [ cm1 ( 1.0 * i / num_colors ) for i in range ( col_min , num_colors )] fig = plt . figure ( figsize = ( 7 , 5 )) gs = mpl . gridspec . GridSpec ( 2 , 1 , height_ratios = [ 4 , 1 ]) ax = plt . subplot ( gs [ 0 ]) for i in range ( num_samples ): plt . plot ( t , sim_samples [ i , :], color = col1 [ i ], lw = 2 ) plt . ylabel ( \"voltage (mV)\" ) ax . set_xticks ([]) ax . set_yticks ([ - 80 , - 20 , 40 ]) ax = plt . subplot ( gs [ 1 ]) plt . plot ( t , I * A_soma * 1e3 , \"k\" , lw = 2 ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"input (nA)\" ) ax . set_xticks ([ 0 , max ( t ) / 2 , max ( t )]) ax . set_yticks ([ 0 , 1.1 * np . max ( I * A_soma * 1e3 )]) ax . yaxis . set_major_formatter ( mpl . ticker . FormatStrFormatter ( \" %.2f \" )) plt . show () As can be seen, the voltage traces can be quite diverse for different parameter values. Often, we are not interested in matching the exact trace, but only in matching certain features thereof. In this example of the Hodgkin-Huxley model, the summary features are the number of spikes, the mean resting potential, the standard deviation of the resting potential, and the first four voltage moments: mean, standard deviation, skewness and kurtosis. Using the function calculate_summary_statistics() imported below, we obtain these statistics from the output of the Hodgkin Huxley simulator. from HH_helper_functions import calculate_summary_statistics Lastly, we define a function that performs all of the above steps at once. The function simulation_wrapper takes in conductance values, runs the Hodgkin Huxley model and then returns the summary statistics. def simulation_wrapper ( params ): \"\"\" Returns summary statistics from conductance values in `params`. Summarizes the output of the HH simulator and converts it to `torch.Tensor`. \"\"\" obs = run_HH_model ( params ) summstats = torch . as_tensor ( calculate_summary_statistics ( obs )) return summstats sbi takes any function as simulator. Thus, sbi also has the flexibility to use simulators that utilize external packages, e.g., Brian ( http://briansimulator.org/ ), nest ( https://www.nest-simulator.org/ ), or NEURON ( https://neuron.yale.edu/neuron/ ). External simulators do not even need to be Python-based as long as they store simulation outputs in a format that can be read from Python. All that is necessary is to wrap your external simulator of choice into a Python callable that takes a parameter set and outputs a set of summary statistics we want to fit the parameters to. 3. Prior over model parameters \u00b6 Now that we have the simulator, we need to define a function with the prior over the model parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), which in this case is chosen to be a Uniform distribution: prior_min = [ 0.5 , 1e-4 ] prior_max = [ 80.0 , 15.0 ] prior = utils . torchutils . BoxUniform ( low = torch . as_tensor ( prior_min ), high = torch . as_tensor ( prior_max ) ) Inference \u00b6 Now that we have all the required components, we can run inference with SNPE to identify parameters whose activity matches this trace. posterior = infer ( simulation_wrapper , prior , method = \"SNPE\" , num_simulations = 300 , num_workers = 4 ) HBox(children=(FloatProgress(value=0.0, description='Running 300 simulations in 300 batches.', max=300.0, styl\u2026 Neural network successfully converged after 233 epochs. Note sbi can parallelize your simulator. If you experience problems with parallelization, try setting num_workers=1 and please give us an error report as a GitHub issue . Coming back to the observed data \u00b6 As mentioned at the beginning of the tutorial, the observed data are generated by the Hodgkin-Huxley model with a set of known parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ). To illustrate how to compute the summary statistics of the observed data, let us regenerate the observed data: # true parameters and respective labels true_params = np . array ([ 50.0 , 5.0 ]) labels_params = [ r \"$g_ {Na} $\" , r \"$g_ {K} $\" ] observation_trace = run_HH_model ( true_params ) observation_summary_statistics = calculate_summary_statistics ( observation_trace ) As we already shown above, the observed voltage traces look as follows: fig = plt . figure ( figsize = ( 7 , 5 )) gs = mpl . gridspec . GridSpec ( 2 , 1 , height_ratios = [ 4 , 1 ]) ax = plt . subplot ( gs [ 0 ]) plt . plot ( observation_trace [ \"time\" ], observation_trace [ \"data\" ]) plt . ylabel ( \"voltage (mV)\" ) plt . title ( \"observed data\" ) plt . setp ( ax , xticks = [], yticks = [ - 80 , - 20 , 40 ]) ax = plt . subplot ( gs [ 1 ]) plt . plot ( observation_trace [ \"time\" ], I * A_soma * 1e3 , \"k\" , lw = 2 ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"input (nA)\" ) ax . set_xticks ([ 0 , max ( observation_trace [ \"time\" ]) / 2 , max ( observation_trace [ \"time\" ])]) ax . set_yticks ([ 0 , 1.1 * np . max ( I * A_soma * 1e3 )]) ax . yaxis . set_major_formatter ( mpl . ticker . FormatStrFormatter ( \" %.2f \" )) Analysis of the posterior given the observed data \u00b6 After running the inference algorithm, let us inspect the inferred posterior distribution over the parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), given the observed trace. To do so, we first draw samples (i.e. consistent parameter sets) from the posterior: samples = posterior . sample (( 10000 ,), x = observation_summary_statistics ) HBox(children=(FloatProgress(value=0.0, description='Drawing 10000 posterior samples', max=10000.0, style=Prog\u2026 fig , axes = analysis . pairplot ( samples , limits = [[ 0.5 , 80 ], [ 1e-4 , 15.0 ]], ticks = [[ 0.5 , 80 ], [ 1e-4 , 15.0 ]], figsize = ( 5 , 5 ), points = true_params , points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , ); As can be seen, the inferred posterior contains the ground-truth parameters (red) in a high-probability region. Now, let us sample parameters from the posterior distribution, simulate the Hodgkin-Huxley model for this parameter set and compare the simulations with the observed data: # Draw a sample from the posterior and convert to numpy for plotting. posterior_sample = posterior . sample (( 1 ,), x = observation_summary_statistics ) . numpy () HBox(children=(FloatProgress(value=0.0, description='Drawing 1 posterior samples', max=1.0, style=ProgressStyl\u2026 fig = plt . figure ( figsize = ( 7 , 5 )) # plot observation t = observation_trace [ \"time\" ] y_obs = observation_trace [ \"data\" ] plt . plot ( t , y_obs , lw = 2 , label = \"observation\" ) # simulate and plot samples x = run_HH_model ( posterior_sample ) plt . plot ( t , x [ \"data\" ], \"--\" , lw = 2 , label = \"posterior sample\" ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"voltage (mV)\" ) ax = plt . gca () handles , labels = ax . get_legend_handles_labels () ax . legend ( handles [:: - 1 ], labels [:: - 1 ], bbox_to_anchor = ( 1.3 , 1 ), loc = \"upper right\" ) ax . set_xticks ([ 0 , 60 , 120 ]) ax . set_yticks ([ - 80 , - 20 , 40 ]); As can be seen, the sample from the inferred posterior leads to simulations that closely resemble the observed data, confirming that SNPE did a good job at capturing the observed data in this simple case. References \u00b6 A. L. Hodgkin and A. F. Huxley. A quantitative description of membrane current and its application to conduction and excitation in nerve. The Journal of Physiology, 117(4):500\u2013544, 1952. M. Pospischil, M. Toledo-Rodriguez, C. Monier, Z. Piwkowska, T. Bal, Y. Fr\u00e9gnac, H. Markram, and A. Destexhe. Minimal Hodgkin-Huxley type models for different classes of cortical and thalamic neurons. Biological Cybernetics, 99(4-5), 2008.","title":"Hodgkin-Huxley example"},{"location":"examples/00_HH_simulator/#inference-on-hodgkin-huxley-model-tutorial","text":"In this tutorial, we use sbi to do inference on a Hodgkin-Huxley model from neuroscience (Hodgkin and Huxley, 1952). We will learn two parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ) based on a current-clamp recording, that we generate synthetically (in practice, this would be an experimental observation). Note, you find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/examples/00_HH_simulator.ipynb in the sbi repository. First we are going to import basic packages. import numpy as np import torch # visualization import matplotlib as mpl import matplotlib.pyplot as plt # sbi from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer # remove top and right axis from plots mpl . rcParams [ \"axes.spines.right\" ] = False mpl . rcParams [ \"axes.spines.top\" ] = False","title":"Inference on Hodgkin-Huxley model: tutorial"},{"location":"examples/00_HH_simulator/#different-required-components","text":"Before running inference, let us define the different required components: observed data prior over model parameters simulator","title":"Different required components"},{"location":"examples/00_HH_simulator/#1-observed-data","text":"Let us assume we current-clamped a neuron and recorded the following voltage trace: In fact, this voltage trace was not measured experimentally but synthetically generated by simulating a Hodgkin-Huxley model with particular parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ). We will come back to this point later in the tutorial.","title":"1. Observed data"},{"location":"examples/00_HH_simulator/#2-simulator","text":"We would like to infer the posterior over the two parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ) of a Hodgkin-Huxley model, given the observed electrophysiological recording above. The model has channel kinetics as in Pospischil et al. 2008 , and is defined by the following set of differential equations (parameters of interest highlighted in orange): \\[ \\scriptsize \\begin{align} C_m\\frac{dV}{dt}&=g_1\\left(E_1-V\\right)+ \\color{orange}{\\bar{g}_{Na}}m^3h\\left(E_{Na}-V\\right)+ \\color{orange}{\\bar{g}_{K}}n^4\\left(E_K-V\\right)+ \\bar{g}_Mp\\left(E_K-V\\right)+ I_{inj}+ \\sigma\\eta\\left(t\\right)\\\\ \\frac{dq}{dt}&=\\frac{q_\\infty\\left(V\\right)-q}{\\tau_q\\left(V\\right)},\\;q\\in\\{m,h,n,p\\} \\end{align} \\] Above, \\(V\\) represents the membrane potential, \\(C_m\\) is the membrane capacitance, \\(g_{\\text{l}}\\) is the leak conductance, \\(E_{\\text{l}}\\) is the membrane reversal potential, \\(\\bar{g}_c\\) is the density of channels of type \\(c\\) ( \\(\\text{Na}^+\\) , \\(\\text{K}^+\\) , M), \\(E_c\\) is the reversal potential of \\(c\\) , ( \\(m\\) , \\(h\\) , \\(n\\) , \\(p\\) ) are the respective channel gating kinetic variables, and \\(\\sigma \\eta(t)\\) is the intrinsic neural noise. The right hand side of the voltage dynamics is composed of a leak current, a voltage-dependent \\(\\text{Na}^+\\) current, a delayed-rectifier \\(\\text{K}^+\\) current, a slow voltage-dependent \\(\\text{K}^+\\) current responsible for spike-frequency adaptation, and an injected current \\(I_{\\text{inj}}\\) . Channel gating variables \\(q\\) have dynamics fully characterized by the neuron membrane potential \\(V\\) , given the respective steady-state \\(q_{\\infty}(V)\\) and time constant \\(\\tau_{q}(V)\\) (details in Pospischil et al. 2008). The input current \\(I_{\\text{inj}}\\) is defined as from HH_helper_functions import syn_current I , t_on , t_off , dt , t , A_soma = syn_current () The Hodgkin-Huxley simulator is given by: from HH_helper_functions import HHsimulator Putting the input current and the simulator together: def run_HH_model ( params ): params = np . asarray ( params ) # input current, time step I , t_on , t_off , dt , t , A_soma = syn_current () t = np . arange ( 0 , len ( I ), 1 ) * dt # initial voltage V0 = - 70 states = HHsimulator ( V0 , params . reshape ( 1 , - 1 ), dt , t , I ) return dict ( data = states . reshape ( - 1 ), time = t , dt = dt , I = I . reshape ( - 1 )) To get an idea of the output of the Hodgkin-Huxley model, let us generate some voltage traces for different parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), given the input current \\(I_{\\text{inj}}\\) : # three sets of (g_Na, g_K) params = np . array ([[ 50.0 , 1.0 ], [ 4.0 , 1.5 ], [ 20.0 , 15.0 ]]) num_samples = len ( params [:, 0 ]) sim_samples = np . zeros (( num_samples , len ( I ))) for i in range ( num_samples ): sim_samples [ i , :] = run_HH_model ( params = params [ i , :])[ \"data\" ] # colors for traces col_min = 2 num_colors = num_samples + col_min cm1 = mpl . cm . Blues col1 = [ cm1 ( 1.0 * i / num_colors ) for i in range ( col_min , num_colors )] fig = plt . figure ( figsize = ( 7 , 5 )) gs = mpl . gridspec . GridSpec ( 2 , 1 , height_ratios = [ 4 , 1 ]) ax = plt . subplot ( gs [ 0 ]) for i in range ( num_samples ): plt . plot ( t , sim_samples [ i , :], color = col1 [ i ], lw = 2 ) plt . ylabel ( \"voltage (mV)\" ) ax . set_xticks ([]) ax . set_yticks ([ - 80 , - 20 , 40 ]) ax = plt . subplot ( gs [ 1 ]) plt . plot ( t , I * A_soma * 1e3 , \"k\" , lw = 2 ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"input (nA)\" ) ax . set_xticks ([ 0 , max ( t ) / 2 , max ( t )]) ax . set_yticks ([ 0 , 1.1 * np . max ( I * A_soma * 1e3 )]) ax . yaxis . set_major_formatter ( mpl . ticker . FormatStrFormatter ( \" %.2f \" )) plt . show () As can be seen, the voltage traces can be quite diverse for different parameter values. Often, we are not interested in matching the exact trace, but only in matching certain features thereof. In this example of the Hodgkin-Huxley model, the summary features are the number of spikes, the mean resting potential, the standard deviation of the resting potential, and the first four voltage moments: mean, standard deviation, skewness and kurtosis. Using the function calculate_summary_statistics() imported below, we obtain these statistics from the output of the Hodgkin Huxley simulator. from HH_helper_functions import calculate_summary_statistics Lastly, we define a function that performs all of the above steps at once. The function simulation_wrapper takes in conductance values, runs the Hodgkin Huxley model and then returns the summary statistics. def simulation_wrapper ( params ): \"\"\" Returns summary statistics from conductance values in `params`. Summarizes the output of the HH simulator and converts it to `torch.Tensor`. \"\"\" obs = run_HH_model ( params ) summstats = torch . as_tensor ( calculate_summary_statistics ( obs )) return summstats sbi takes any function as simulator. Thus, sbi also has the flexibility to use simulators that utilize external packages, e.g., Brian ( http://briansimulator.org/ ), nest ( https://www.nest-simulator.org/ ), or NEURON ( https://neuron.yale.edu/neuron/ ). External simulators do not even need to be Python-based as long as they store simulation outputs in a format that can be read from Python. All that is necessary is to wrap your external simulator of choice into a Python callable that takes a parameter set and outputs a set of summary statistics we want to fit the parameters to.","title":"2. Simulator"},{"location":"examples/00_HH_simulator/#3-prior-over-model-parameters","text":"Now that we have the simulator, we need to define a function with the prior over the model parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), which in this case is chosen to be a Uniform distribution: prior_min = [ 0.5 , 1e-4 ] prior_max = [ 80.0 , 15.0 ] prior = utils . torchutils . BoxUniform ( low = torch . as_tensor ( prior_min ), high = torch . as_tensor ( prior_max ) )","title":"3. Prior over model parameters"},{"location":"examples/00_HH_simulator/#inference","text":"Now that we have all the required components, we can run inference with SNPE to identify parameters whose activity matches this trace. posterior = infer ( simulation_wrapper , prior , method = \"SNPE\" , num_simulations = 300 , num_workers = 4 ) HBox(children=(FloatProgress(value=0.0, description='Running 300 simulations in 300 batches.', max=300.0, styl\u2026 Neural network successfully converged after 233 epochs. Note sbi can parallelize your simulator. If you experience problems with parallelization, try setting num_workers=1 and please give us an error report as a GitHub issue .","title":"Inference"},{"location":"examples/00_HH_simulator/#coming-back-to-the-observed-data","text":"As mentioned at the beginning of the tutorial, the observed data are generated by the Hodgkin-Huxley model with a set of known parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ). To illustrate how to compute the summary statistics of the observed data, let us regenerate the observed data: # true parameters and respective labels true_params = np . array ([ 50.0 , 5.0 ]) labels_params = [ r \"$g_ {Na} $\" , r \"$g_ {K} $\" ] observation_trace = run_HH_model ( true_params ) observation_summary_statistics = calculate_summary_statistics ( observation_trace ) As we already shown above, the observed voltage traces look as follows: fig = plt . figure ( figsize = ( 7 , 5 )) gs = mpl . gridspec . GridSpec ( 2 , 1 , height_ratios = [ 4 , 1 ]) ax = plt . subplot ( gs [ 0 ]) plt . plot ( observation_trace [ \"time\" ], observation_trace [ \"data\" ]) plt . ylabel ( \"voltage (mV)\" ) plt . title ( \"observed data\" ) plt . setp ( ax , xticks = [], yticks = [ - 80 , - 20 , 40 ]) ax = plt . subplot ( gs [ 1 ]) plt . plot ( observation_trace [ \"time\" ], I * A_soma * 1e3 , \"k\" , lw = 2 ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"input (nA)\" ) ax . set_xticks ([ 0 , max ( observation_trace [ \"time\" ]) / 2 , max ( observation_trace [ \"time\" ])]) ax . set_yticks ([ 0 , 1.1 * np . max ( I * A_soma * 1e3 )]) ax . yaxis . set_major_formatter ( mpl . ticker . FormatStrFormatter ( \" %.2f \" ))","title":"Coming back to the observed data"},{"location":"examples/00_HH_simulator/#analysis-of-the-posterior-given-the-observed-data","text":"After running the inference algorithm, let us inspect the inferred posterior distribution over the parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), given the observed trace. To do so, we first draw samples (i.e. consistent parameter sets) from the posterior: samples = posterior . sample (( 10000 ,), x = observation_summary_statistics ) HBox(children=(FloatProgress(value=0.0, description='Drawing 10000 posterior samples', max=10000.0, style=Prog\u2026 fig , axes = analysis . pairplot ( samples , limits = [[ 0.5 , 80 ], [ 1e-4 , 15.0 ]], ticks = [[ 0.5 , 80 ], [ 1e-4 , 15.0 ]], figsize = ( 5 , 5 ), points = true_params , points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , ); As can be seen, the inferred posterior contains the ground-truth parameters (red) in a high-probability region. Now, let us sample parameters from the posterior distribution, simulate the Hodgkin-Huxley model for this parameter set and compare the simulations with the observed data: # Draw a sample from the posterior and convert to numpy for plotting. posterior_sample = posterior . sample (( 1 ,), x = observation_summary_statistics ) . numpy () HBox(children=(FloatProgress(value=0.0, description='Drawing 1 posterior samples', max=1.0, style=ProgressStyl\u2026 fig = plt . figure ( figsize = ( 7 , 5 )) # plot observation t = observation_trace [ \"time\" ] y_obs = observation_trace [ \"data\" ] plt . plot ( t , y_obs , lw = 2 , label = \"observation\" ) # simulate and plot samples x = run_HH_model ( posterior_sample ) plt . plot ( t , x [ \"data\" ], \"--\" , lw = 2 , label = \"posterior sample\" ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"voltage (mV)\" ) ax = plt . gca () handles , labels = ax . get_legend_handles_labels () ax . legend ( handles [:: - 1 ], labels [:: - 1 ], bbox_to_anchor = ( 1.3 , 1 ), loc = \"upper right\" ) ax . set_xticks ([ 0 , 60 , 120 ]) ax . set_yticks ([ - 80 , - 20 , 40 ]); As can be seen, the sample from the inferred posterior leads to simulations that closely resemble the observed data, confirming that SNPE did a good job at capturing the observed data in this simple case.","title":"Analysis of the posterior given the observed data"},{"location":"examples/00_HH_simulator/#references","text":"A. L. Hodgkin and A. F. Huxley. A quantitative description of membrane current and its application to conduction and excitation in nerve. The Journal of Physiology, 117(4):500\u2013544, 1952. M. Pospischil, M. Toledo-Rodriguez, C. Monier, Z. Piwkowska, T. Bal, Y. Fr\u00e9gnac, H. Markram, and A. Destexhe. Minimal Hodgkin-Huxley type models for different classes of cortical and thalamic neurons. Biological Cybernetics, 99(4-5), 2008.","title":"References"},{"location":"examples/01_decision_making_model/","text":"SBI for decision-making models \u00b6 In a previous tutorial , we showed how to use SBI with trial-based iid data. Such scenarios can arise, for example, in models of perceptual decision making. In addition to trial-based iid data points, these models often come with mixed data types and varying experimental conditions. Here, we show how sbi can be used to perform inference in such models with the MNLE method. Trial-based SBI with mixed data types \u00b6 In some cases, models with trial-based data additionally return data with mixed data types, e.g., continous and discrete data. For example, most computational models of decision-making have continuous reaction times and discrete choices as output. This can induce a problem when performing trial-based SBI that relies on learning a neural likelihood: It is challenging for most density estimators to handle both, continuous and discrete data at the same time. However, there is a recent SBI method for solving this problem, it\u2019s called Mixed Neural Likelihood Estimation (MNLE). It works just like NLE, but with mixed data types. The trick is that it learns two separate density estimators, one for the discrete part of the data, and one for the continuous part, and combines the two to obtain the final neural likelihood. Crucially, the continuous density estimator is trained conditioned on the output of the discrete one, such that statistical dependencies between the discrete and continuous data (e.g., between choices and reaction times) are modeled as well. The interested reader is referred to the original paper available here . MNLE was recently added to sbi (see this PR and also issue ) and follows the same API as SNLE . In this tutorial we will show how to apply MNLE to mixed data, and how to deal with varying experimental conditions. Toy problem for MNLE \u00b6 To illustrate MNLE we set up a toy simulator that outputs mixed data and for which we know the likelihood such we can obtain reference posterior samples via MCMC. Simulator : To simulate mixed data we do the following Sample reaction time from inverse Gamma Sample choices from Binomial Return reaction time \\(rt \\in (0, \\infty)\\) and choice index \\(c \\in \\{0, 1\\}\\) \\[ c \\sim \\text{Binomial}(\\rho) \\\\ rt \\sim \\text{InverseGamma}(\\alpha=2, \\beta) \\\\ \\] Prior : The priors of the two parameters \\(\\rho\\) and \\(\\beta\\) are independent. We define a Beta prior over the probabilty parameter of the Binomial used in the simulator and a Gamma prior over the shape-parameter of the inverse Gamma used in the simulator: \\[ p(\\beta, \\rho) = p(\\beta) \\; p(\\rho) ; \\\\ p(\\beta) = \\text{Gamma}(1, 0.5) \\\\ p(\\text{probs}) = \\text{Beta}(2, 2) \\] Because the InverseGamma and the Binomial likelihoods are well-defined we can perform MCMC on this problem and obtain reference-posterior samples. import matplotlib.pyplot as plt import torch from torch import Tensor from sbi.inference import MNLE from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Categorical , Gamma from sbi.utils import MultipleIndependent from sbi.utils.metrics import c2st from sbi.analysis import pairplot from sbi.inference import MCMCPosterior from sbi.utils.torchutils import atleast_2d from sbi.inference.potentials.likelihood_based_potential import ( MixedLikelihoodBasedPotential , ) from sbi.utils.conditional_density_utils import ConditionedPotential from sbi.utils import mcmc_transform from sbi.inference.potentials.base_potential import BasePotential # Toy simulator for mixed data def mixed_simulator ( theta : Tensor , concentration_scaling : float = 1.0 ): \"\"\"Returns a sample from a mixed distribution given parameters theta. Args: theta: batch of parameters, shape (batch_size, 2) concentration_scaling: scaling factor for the concentration parameter of the InverseGamma distribution, mimics an experimental condition. \"\"\" beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = concentration_scaling * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # The potential function defines the ground truth likelihood and allows us to obtain reference posterior samples via MCMC. class PotentialFunctionProvider ( BasePotential ): allow_iid_x = True # type: ignore def __init__ ( self , prior , x_o , concentration_scaling = 1.0 , device = \"cpu\" ): super () . __init__ ( prior , x_o , device ) self . concentration_scaling = concentration_scaling def __call__ ( self , theta , track_gradients : bool = True ): theta = atleast_2d ( theta ) with torch . set_grad_enabled ( track_gradients ): iid_ll = self . iid_likelihood ( theta ) return iid_ll + self . prior . log_prob ( theta ) def iid_likelihood ( self , theta ): lp_choices = torch . stack ( [ Binomial ( probs = th . reshape ( 1 , - 1 )) . log_prob ( self . x_o [:, 1 :]) for th in theta [:, 1 :] ], dim = 1 , ) lp_rts = torch . stack ( [ InverseGamma ( concentration = self . concentration_scaling * torch . ones_like ( beta_i ), rate = beta_i , ) . log_prob ( self . x_o [:, : 1 ]) for beta_i in theta [:, : 1 ] ], dim = 1 , ) joint_likelihood = ( lp_choices + lp_rts ) . squeeze () assert joint_likelihood . shape == torch . Size ([ self . x_o . shape [ 0 ], theta . shape [ 0 ]]) return joint_likelihood . sum ( 0 ) # Define independent prior. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , ) Obtain reference-posterior samples via analytical likelihood and MCMC \u00b6 torch . manual_seed ( 42 ) num_trials = 10 num_samples = 1000 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) mcmc_kwargs = dict ( num_chains = 20 , warmup_steps = 50 , method = \"slice_np_vectorized\" , init_strategy = \"proposal\" , ) true_posterior = MCMCPosterior ( potential_fn = PotentialFunctionProvider ( prior , x_o ), proposal = prior , theta_transform = mcmc_transform ( prior , enable_transform = True ), ** mcmc_kwargs , ) true_samples = true_posterior . sample (( num_samples ,)) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 10 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Train MNLE and generate samples via MCMC \u00b6 # Training data num_simulations = 20000 # For training the MNLE emulator we need to define a proposal distribution, the prior is # a good choice. proposal = prior theta = proposal . sample (( num_simulations ,)) x = mixed_simulator ( theta ) # Train MNLE and obtain MCMC-based posterior. trainer = MNLE () estimator = trainer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ) /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 73 epochs. # Build posterior from the trained estimator and prior. mnle_posterior = trainer . build_posterior ( prior = prior ) mnle_samples = mnle_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Compare MNLE and reference posterior \u00b6 # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_samples , mnle_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); We see that the inferred MNLE posterior nicely matches the reference posterior, and how both inferred a posterior that is quite different from the prior. Because MNLE training is amortized we can obtain another posterior given a different observation with potentially a different number of trials, just by running MCMC again (without re-training MNLE ): Repeat inference with different x_o that contains more trials \u00b6 num_trials = 50 x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) true_samples = true_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) mnle_samples = mnle_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 50 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_samples , mnle_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); print ( c2st ( true_samples , mnle_samples )[ 0 ]) tensor(0.5565) Again we can see that the posteriors match nicely. In addition, we observe that the posterior\u2019s (epistemic) uncertainty reduces as we increase the number of trials. Note: MNLE is trained on single-trial data. Theoretically, density estimation is perfectly accurate only in the limit of infinite training data. Thus, training with a finite amount of training data naturally induces a small bias in the density estimator. As we observed above, this bias is so small that we don\u2019t really notice it, e.g., the c2st scores were close to 0.5. However, when we increase the number of trials in x_o dramatically (on the order of 1000s) the small bias can accumulate over the trials and inference with MNLE can become less accurate. MNLE with experimental conditions \u00b6 In the perceptual decision-making research it is common to design experiments with varying experimental decisions, e.g., to vary the difficulty of the task. During parameter inference, it can be beneficial to incorporate the experimental conditions. In MNLE, we are learning an emulator that should be able to generate synthetic experimental data including reaction times and choices given different experimental conditions. Thus, to make MNLE work with experimental conditions, we need to include them in the training process, i.e., treat them like auxiliary parameters of the simulator: # define a simulator wrapper in which the experimental condition are contained in theta and passed to the simulator. def sim_wrapper ( theta ): # simulate with experiment conditions return mixed_simulator ( theta = theta [:, : 2 ], concentration_scaling = theta [:, 2 :] + 1 , # add 1 to deal with 0 values from Categorical distribution ) # Define a proposal that contains both, priors for the parameters and a discrte prior over experimental conditions. proposal = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), Categorical ( probs = torch . ones ( 1 , 3 )), ], validate_args = False , ) # Simulated data num_simulations = 10000 num_samples = 1000 theta = proposal . sample (( num_simulations ,)) x = sim_wrapper ( theta ) assert x . shape == ( num_simulations , 2 ) # simulate observed data and define ground truth parameters num_trials = 10 theta_o = proposal . sample (( 1 ,)) theta_o [ 0 , 2 ] = 2.0 # set condition to 2 as in original simulator. x_o = sim_wrapper ( theta_o . repeat ( num_trials , 1 )) Obtain ground truth posterior via MCMC \u00b6 We obtain a ground-truth posterior via MCMC by using the PotentialFunctionProvider. For that, we first the define the actual prior, i.e., the distribution over the parameter we want to infer (not the proposal). Thus, we leave out the discrete prior over experimental conditions. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , ) prior_transform = mcmc_transform ( prior ) # We can now use the PotentialFunctionProvider to obtain a ground-truth posterior via MCMC. true_posterior_samples = MCMCPosterior ( PotentialFunctionProvider ( prior , x_o , concentration_scaling = float ( theta_o [ 0 , 2 ]) + 1.0 , # add one because the sim_wrapper adds one (see above) ), theta_transform = prior_transform , proposal = prior , ** mcmc_kwargs , ) . sample (( num_samples ,), show_progress_bars = True ) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 10 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Train MNLE including experimental conditions \u00b6 trainer = MNLE ( proposal ) estimator = trainer . append_simulations ( theta , x ) . train ( training_batch_size = 100 ) /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 73 epochs. Construct conditional potential function \u00b6 To obtain posterior samples conditioned on a particular experimental condition (and on x_o), we need to construct a corresponding potential function. # First, we define the potential function for the complete, unconditional MNLE-likelihood. potential_fn = MixedLikelihoodBasedPotential ( estimator , proposal , x_o ) # Then we use the potential to construct the conditional potential function. # Here, we tell the constructor to condition on the last dimension (index 2) by passing dims_to_sample=[0, 1]. conditioned_potential_fn = ConditionedPotential ( potential_fn , condition = theta_o , dims_to_sample = [ 0 , 1 ], allow_iid_x = True , # we also need to explicitly tell that MNLE allows iid_x ) # Using this potential function, we can now obtain conditional samples. mnle_posterior = MCMCPosterior ( potential_fn = conditioned_potential_fn , theta_transform = prior_transform , proposal = prior , ** mcmc_kwargs ) conditional_samples = mnle_posterior . sample (( num_samples ,), x = x_o ) Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] # Finally, we can compare the ground truth conditional posterior with the MNLE-conditional posterior. fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_posterior_samples , conditional_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); They match accurately, showing that we can indeed post-hoc condition the trained MNLE likelihood on different experimental conditions.","title":"Decision making model"},{"location":"examples/01_decision_making_model/#sbi-for-decision-making-models","text":"In a previous tutorial , we showed how to use SBI with trial-based iid data. Such scenarios can arise, for example, in models of perceptual decision making. In addition to trial-based iid data points, these models often come with mixed data types and varying experimental conditions. Here, we show how sbi can be used to perform inference in such models with the MNLE method.","title":"SBI for decision-making models"},{"location":"examples/01_decision_making_model/#trial-based-sbi-with-mixed-data-types","text":"In some cases, models with trial-based data additionally return data with mixed data types, e.g., continous and discrete data. For example, most computational models of decision-making have continuous reaction times and discrete choices as output. This can induce a problem when performing trial-based SBI that relies on learning a neural likelihood: It is challenging for most density estimators to handle both, continuous and discrete data at the same time. However, there is a recent SBI method for solving this problem, it\u2019s called Mixed Neural Likelihood Estimation (MNLE). It works just like NLE, but with mixed data types. The trick is that it learns two separate density estimators, one for the discrete part of the data, and one for the continuous part, and combines the two to obtain the final neural likelihood. Crucially, the continuous density estimator is trained conditioned on the output of the discrete one, such that statistical dependencies between the discrete and continuous data (e.g., between choices and reaction times) are modeled as well. The interested reader is referred to the original paper available here . MNLE was recently added to sbi (see this PR and also issue ) and follows the same API as SNLE . In this tutorial we will show how to apply MNLE to mixed data, and how to deal with varying experimental conditions.","title":"Trial-based SBI with mixed data types"},{"location":"examples/01_decision_making_model/#toy-problem-for-mnle","text":"To illustrate MNLE we set up a toy simulator that outputs mixed data and for which we know the likelihood such we can obtain reference posterior samples via MCMC. Simulator : To simulate mixed data we do the following Sample reaction time from inverse Gamma Sample choices from Binomial Return reaction time \\(rt \\in (0, \\infty)\\) and choice index \\(c \\in \\{0, 1\\}\\) \\[ c \\sim \\text{Binomial}(\\rho) \\\\ rt \\sim \\text{InverseGamma}(\\alpha=2, \\beta) \\\\ \\] Prior : The priors of the two parameters \\(\\rho\\) and \\(\\beta\\) are independent. We define a Beta prior over the probabilty parameter of the Binomial used in the simulator and a Gamma prior over the shape-parameter of the inverse Gamma used in the simulator: \\[ p(\\beta, \\rho) = p(\\beta) \\; p(\\rho) ; \\\\ p(\\beta) = \\text{Gamma}(1, 0.5) \\\\ p(\\text{probs}) = \\text{Beta}(2, 2) \\] Because the InverseGamma and the Binomial likelihoods are well-defined we can perform MCMC on this problem and obtain reference-posterior samples. import matplotlib.pyplot as plt import torch from torch import Tensor from sbi.inference import MNLE from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Categorical , Gamma from sbi.utils import MultipleIndependent from sbi.utils.metrics import c2st from sbi.analysis import pairplot from sbi.inference import MCMCPosterior from sbi.utils.torchutils import atleast_2d from sbi.inference.potentials.likelihood_based_potential import ( MixedLikelihoodBasedPotential , ) from sbi.utils.conditional_density_utils import ConditionedPotential from sbi.utils import mcmc_transform from sbi.inference.potentials.base_potential import BasePotential # Toy simulator for mixed data def mixed_simulator ( theta : Tensor , concentration_scaling : float = 1.0 ): \"\"\"Returns a sample from a mixed distribution given parameters theta. Args: theta: batch of parameters, shape (batch_size, 2) concentration_scaling: scaling factor for the concentration parameter of the InverseGamma distribution, mimics an experimental condition. \"\"\" beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = concentration_scaling * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # The potential function defines the ground truth likelihood and allows us to obtain reference posterior samples via MCMC. class PotentialFunctionProvider ( BasePotential ): allow_iid_x = True # type: ignore def __init__ ( self , prior , x_o , concentration_scaling = 1.0 , device = \"cpu\" ): super () . __init__ ( prior , x_o , device ) self . concentration_scaling = concentration_scaling def __call__ ( self , theta , track_gradients : bool = True ): theta = atleast_2d ( theta ) with torch . set_grad_enabled ( track_gradients ): iid_ll = self . iid_likelihood ( theta ) return iid_ll + self . prior . log_prob ( theta ) def iid_likelihood ( self , theta ): lp_choices = torch . stack ( [ Binomial ( probs = th . reshape ( 1 , - 1 )) . log_prob ( self . x_o [:, 1 :]) for th in theta [:, 1 :] ], dim = 1 , ) lp_rts = torch . stack ( [ InverseGamma ( concentration = self . concentration_scaling * torch . ones_like ( beta_i ), rate = beta_i , ) . log_prob ( self . x_o [:, : 1 ]) for beta_i in theta [:, : 1 ] ], dim = 1 , ) joint_likelihood = ( lp_choices + lp_rts ) . squeeze () assert joint_likelihood . shape == torch . Size ([ self . x_o . shape [ 0 ], theta . shape [ 0 ]]) return joint_likelihood . sum ( 0 ) # Define independent prior. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , )","title":"Toy problem for MNLE"},{"location":"examples/01_decision_making_model/#obtain-reference-posterior-samples-via-analytical-likelihood-and-mcmc","text":"torch . manual_seed ( 42 ) num_trials = 10 num_samples = 1000 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) mcmc_kwargs = dict ( num_chains = 20 , warmup_steps = 50 , method = \"slice_np_vectorized\" , init_strategy = \"proposal\" , ) true_posterior = MCMCPosterior ( potential_fn = PotentialFunctionProvider ( prior , x_o ), proposal = prior , theta_transform = mcmc_transform ( prior , enable_transform = True ), ** mcmc_kwargs , ) true_samples = true_posterior . sample (( num_samples ,)) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 10 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s]","title":"Obtain reference-posterior samples via analytical likelihood and MCMC"},{"location":"examples/01_decision_making_model/#train-mnle-and-generate-samples-via-mcmc","text":"# Training data num_simulations = 20000 # For training the MNLE emulator we need to define a proposal distribution, the prior is # a good choice. proposal = prior theta = proposal . sample (( num_simulations ,)) x = mixed_simulator ( theta ) # Train MNLE and obtain MCMC-based posterior. trainer = MNLE () estimator = trainer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ) /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 73 epochs. # Build posterior from the trained estimator and prior. mnle_posterior = trainer . build_posterior ( prior = prior ) mnle_samples = mnle_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s]","title":"Train MNLE and generate samples via MCMC"},{"location":"examples/01_decision_making_model/#compare-mnle-and-reference-posterior","text":"# Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_samples , mnle_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); We see that the inferred MNLE posterior nicely matches the reference posterior, and how both inferred a posterior that is quite different from the prior. Because MNLE training is amortized we can obtain another posterior given a different observation with potentially a different number of trials, just by running MCMC again (without re-training MNLE ):","title":"Compare MNLE and reference posterior"},{"location":"examples/01_decision_making_model/#repeat-inference-with-different-x_o-that-contains-more-trials","text":"num_trials = 50 x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) true_samples = true_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) mnle_samples = mnle_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 50 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_samples , mnle_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); print ( c2st ( true_samples , mnle_samples )[ 0 ]) tensor(0.5565) Again we can see that the posteriors match nicely. In addition, we observe that the posterior\u2019s (epistemic) uncertainty reduces as we increase the number of trials. Note: MNLE is trained on single-trial data. Theoretically, density estimation is perfectly accurate only in the limit of infinite training data. Thus, training with a finite amount of training data naturally induces a small bias in the density estimator. As we observed above, this bias is so small that we don\u2019t really notice it, e.g., the c2st scores were close to 0.5. However, when we increase the number of trials in x_o dramatically (on the order of 1000s) the small bias can accumulate over the trials and inference with MNLE can become less accurate.","title":"Repeat inference with different x_o that contains more trials"},{"location":"examples/01_decision_making_model/#mnle-with-experimental-conditions","text":"In the perceptual decision-making research it is common to design experiments with varying experimental decisions, e.g., to vary the difficulty of the task. During parameter inference, it can be beneficial to incorporate the experimental conditions. In MNLE, we are learning an emulator that should be able to generate synthetic experimental data including reaction times and choices given different experimental conditions. Thus, to make MNLE work with experimental conditions, we need to include them in the training process, i.e., treat them like auxiliary parameters of the simulator: # define a simulator wrapper in which the experimental condition are contained in theta and passed to the simulator. def sim_wrapper ( theta ): # simulate with experiment conditions return mixed_simulator ( theta = theta [:, : 2 ], concentration_scaling = theta [:, 2 :] + 1 , # add 1 to deal with 0 values from Categorical distribution ) # Define a proposal that contains both, priors for the parameters and a discrte prior over experimental conditions. proposal = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), Categorical ( probs = torch . ones ( 1 , 3 )), ], validate_args = False , ) # Simulated data num_simulations = 10000 num_samples = 1000 theta = proposal . sample (( num_simulations ,)) x = sim_wrapper ( theta ) assert x . shape == ( num_simulations , 2 ) # simulate observed data and define ground truth parameters num_trials = 10 theta_o = proposal . sample (( 1 ,)) theta_o [ 0 , 2 ] = 2.0 # set condition to 2 as in original simulator. x_o = sim_wrapper ( theta_o . repeat ( num_trials , 1 ))","title":"MNLE with experimental conditions"},{"location":"examples/01_decision_making_model/#train-mnle-including-experimental-conditions","text":"trainer = MNLE ( proposal ) estimator = trainer . append_simulations ( theta , x ) . train ( training_batch_size = 100 ) /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 73 epochs.","title":"Train MNLE including experimental conditions"},{"location":"examples/01_decision_making_model/#construct-conditional-potential-function","text":"To obtain posterior samples conditioned on a particular experimental condition (and on x_o), we need to construct a corresponding potential function. # First, we define the potential function for the complete, unconditional MNLE-likelihood. potential_fn = MixedLikelihoodBasedPotential ( estimator , proposal , x_o ) # Then we use the potential to construct the conditional potential function. # Here, we tell the constructor to condition on the last dimension (index 2) by passing dims_to_sample=[0, 1]. conditioned_potential_fn = ConditionedPotential ( potential_fn , condition = theta_o , dims_to_sample = [ 0 , 1 ], allow_iid_x = True , # we also need to explicitly tell that MNLE allows iid_x ) # Using this potential function, we can now obtain conditional samples. mnle_posterior = MCMCPosterior ( potential_fn = conditioned_potential_fn , theta_transform = prior_transform , proposal = prior , ** mcmc_kwargs ) conditional_samples = mnle_posterior . sample (( num_samples ,), x = x_o ) Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] # Finally, we can compare the ground truth conditional posterior with the MNLE-conditional posterior. fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_posterior_samples , conditional_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); They match accurately, showing that we can indeed post-hoc condition the trained MNLE likelihood on different experimental conditions.","title":"Construct conditional potential function"},{"location":"faq/question_01/","text":"What should I do when my \u2018posterior samples are outside of the prior support\u2019 in SNPE? \u00b6 When working with multi-round SNPE, you might have experienced the following warning: Only x% posterior samples are within the prior support. It may take a long time to collect the remaining 10000 samples. Consider interrupting (Ctrl-C) and switching to 'sample_with_mcmc=True'. This reason for this issue is described in more detail here and here . The following fixes are possible: sample with MCMC: samples = posterior((num_samples,), x=x_o, sample_with_mcmc=True) . This will make sampling slower, but samples will not \u2018leak\u2019. resort to single-round SNPE and (if necessary) increase your simulation budget. if your prior is either Gaussian (torch.distributions.multivariateNormal) or Uniform (sbi.utils.BoxUniform), you can avoid leakage by using a mixture density network as density estimator. I.e., using the flexible interface , set density_estimator='mdn' . When running inference, there should be a print statement \u201cUsing SNPE-C with non-atomic loss\u201d use a different algorithm, e.g. SNRE and SNLE. Note, however, that these algorithms can have different issues and potential pitfalls.","title":"What should I do when my &lsquo;posterior samples are outside of the prior support&rsquo; in SNPE?"},{"location":"faq/question_01/#what-should-i-do-when-my-posterior-samples-are-outside-of-the-prior-support-in-snpe","text":"When working with multi-round SNPE, you might have experienced the following warning: Only x% posterior samples are within the prior support. It may take a long time to collect the remaining 10000 samples. Consider interrupting (Ctrl-C) and switching to 'sample_with_mcmc=True'. This reason for this issue is described in more detail here and here . The following fixes are possible: sample with MCMC: samples = posterior((num_samples,), x=x_o, sample_with_mcmc=True) . This will make sampling slower, but samples will not \u2018leak\u2019. resort to single-round SNPE and (if necessary) increase your simulation budget. if your prior is either Gaussian (torch.distributions.multivariateNormal) or Uniform (sbi.utils.BoxUniform), you can avoid leakage by using a mixture density network as density estimator. I.e., using the flexible interface , set density_estimator='mdn' . When running inference, there should be a print statement \u201cUsing SNPE-C with non-atomic loss\u201d use a different algorithm, e.g. SNRE and SNLE. Note, however, that these algorithms can have different issues and potential pitfalls.","title":"What should I do when my 'posterior samples are outside of the prior support' in SNPE?"},{"location":"faq/question_02/","text":"Can the algorithms deal with invalid data, e.g. NaN or inf? \u00b6 Yes. By default, whenever a simulation returns at least one NaN or inf , it is completely excluded from the training data. In other words, the simulation is simply discarded. In cases where a very large fraction of simulations return NaN or inf , discarding many simulations can be wasteful. There are two options to deal with this: Either, you use the RestrictionEstimator to learn regions in parameter space that do not produce NaN or inf , see here . Alternatively, you can manually substitute the \u2018invalid\u2019 values with a reasonable replacement. I.e., at the end of your simulation code, you search for invalid entries and replace them with a floating point number. Importantly, in order for neural network training work well, the floating point number should still be in a reasonable range, i.e. maybe a few standard deviations outside of \u2018good\u2019 values. If you are running multi-round SNPE, however, things can go fully wrong if invalid data are encountered. In that case, you will get the following warning When invalid simulations are excluded, multi-round SNPE-C can leak into the regions where parameters led to invalid simulations. This can lead to poor results. Hence, if you are running multi-round SNPE and a significant fraction of simulations returns at least one invalid number, we strongly recommend to manually replace the value in your simulation code as described above (or resort to single-round SNPE or use a different method).","title":"Can the algorithms deal with invalid data, e.g. NaN or inf?"},{"location":"faq/question_02/#can-the-algorithms-deal-with-invalid-data-eg-nan-or-inf","text":"Yes. By default, whenever a simulation returns at least one NaN or inf , it is completely excluded from the training data. In other words, the simulation is simply discarded. In cases where a very large fraction of simulations return NaN or inf , discarding many simulations can be wasteful. There are two options to deal with this: Either, you use the RestrictionEstimator to learn regions in parameter space that do not produce NaN or inf , see here . Alternatively, you can manually substitute the \u2018invalid\u2019 values with a reasonable replacement. I.e., at the end of your simulation code, you search for invalid entries and replace them with a floating point number. Importantly, in order for neural network training work well, the floating point number should still be in a reasonable range, i.e. maybe a few standard deviations outside of \u2018good\u2019 values. If you are running multi-round SNPE, however, things can go fully wrong if invalid data are encountered. In that case, you will get the following warning When invalid simulations are excluded, multi-round SNPE-C can leak into the regions where parameters led to invalid simulations. This can lead to poor results. Hence, if you are running multi-round SNPE and a significant fraction of simulations returns at least one invalid number, we strongly recommend to manually replace the value in your simulation code as described above (or resort to single-round SNPE or use a different method).","title":"Can the algorithms deal with invalid data, e.g. NaN or inf?"},{"location":"faq/question_03/","text":"When using multiple workers, I get a pickling error. Can I still use multiprocessing? \u00b6 Yes, but you will have to make a few adjustments to your code. Some background: when using num_workers > 1 , you might experience an error that a certain object from your simulator could not be pickled (an example can be found here ). This can be fixed by forcing sbi to pickle with dill instead of the default cloudpickle . To do so, adjust your code as follows: Install dill : pip install dill At the very beginning of your python script, set the pickler to dill : from joblib.externals.loky import set_loky_pickler set_loky_pickler ( \"dill\" ) Move all imports required by your simulator into the simulator: # Imports specified outside of the simulator will break dill: import torch def my_simulator ( parameters ): return torch . ones ( 1 , 10 ) # Therefore, move the imports into the simulator: def my_simulator ( parameters ): import torch return torch . ones ( 1 , 10 ) Alternative: parallelize yourself \u00b6 You can also write your own code to parallelize simulations with whatever multiprocessing framework you prefer. You can then simulate your data outside of sbi and pass the simulated data as shown in the flexible interface : Some more background \u00b6 sbi uses joblib to parallelize simulations, which in turn uses pickle or cloudpickle to serialize the simulator. Almost all simulators will be picklable with cloudpickle , but we have experienced issues e.g. with neuron simulators, see here .","title":"When using multiple workers, I get a pickling error. Can I still use multiprocessing?"},{"location":"faq/question_03/#when-using-multiple-workers-i-get-a-pickling-error-can-i-still-use-multiprocessing","text":"Yes, but you will have to make a few adjustments to your code. Some background: when using num_workers > 1 , you might experience an error that a certain object from your simulator could not be pickled (an example can be found here ). This can be fixed by forcing sbi to pickle with dill instead of the default cloudpickle . To do so, adjust your code as follows: Install dill : pip install dill At the very beginning of your python script, set the pickler to dill : from joblib.externals.loky import set_loky_pickler set_loky_pickler ( \"dill\" ) Move all imports required by your simulator into the simulator: # Imports specified outside of the simulator will break dill: import torch def my_simulator ( parameters ): return torch . ones ( 1 , 10 ) # Therefore, move the imports into the simulator: def my_simulator ( parameters ): import torch return torch . ones ( 1 , 10 )","title":"When using multiple workers, I get a pickling error. Can I still use multiprocessing?"},{"location":"faq/question_03/#alternative-parallelize-yourself","text":"You can also write your own code to parallelize simulations with whatever multiprocessing framework you prefer. You can then simulate your data outside of sbi and pass the simulated data as shown in the flexible interface :","title":"Alternative: parallelize yourself"},{"location":"faq/question_03/#some-more-background","text":"sbi uses joblib to parallelize simulations, which in turn uses pickle or cloudpickle to serialize the simulator. Almost all simulators will be picklable with cloudpickle , but we have experienced issues e.g. with neuron simulators, see here .","title":"Some more background"},{"location":"faq/question_04/","text":"Can I use the GPU for training the density estimator? \u00b6 TLDR; Yes, by passing device=\"cuda\" and by passing a prior that lives on the device name your passed. But no speed-ups for default density estimators. Yes. When creating the inference object in the flexible interface, you can pass the device as an argument, e.g., inference = SNPE ( prior , device = \"cuda\" , density_estimator = \"maf\" ) The device is set to \"cpu\" by default, and it can be set to anything, as long as it maps to an existing PyTorch CUDA device. sbi will take care of copying the net and the training data to and from the device . Note that the prior must be on the training device already, e.g., when passing device=\"cuda:0\" , make sure to pass a prior object that was created on that device, e.g., prior = torch.distributions.MultivariateNormal(loc=torch.zeros(2, device=\"cuda:0\"), covariance_matrix=torch.eye(2, device=\"cuda:0\")) . Performance \u00b6 Whether or not you reduce your training time when training on a GPU depends on the problem at hand. We provide a couple of default density estimators for SNPE , SNLE and SNRE , e.g., a mixture density network ( density_estimator=\"mdn\" ) or a Masked Autoregressive Flow ( density_estimator=\"maf\" ). For those default density estimators we do not expect a speed up. This is because the underlying neural networks are quite shallow and not tall, e.g., they do not have many parameters or matrix operations that profit a lot from being executed on the GPU. A speed up through training on the GPU will most likely become visible when you are using convolutional modules in your neural networks. E.g., when passing an embedding net for image processing like in this example: https://github.com/sbi-dev/sbi/blob/main/tutorials/05_embedding_net.ipynb .","title":"Can I use the GPU for training the density estimator?"},{"location":"faq/question_04/#can-i-use-the-gpu-for-training-the-density-estimator","text":"TLDR; Yes, by passing device=\"cuda\" and by passing a prior that lives on the device name your passed. But no speed-ups for default density estimators. Yes. When creating the inference object in the flexible interface, you can pass the device as an argument, e.g., inference = SNPE ( prior , device = \"cuda\" , density_estimator = \"maf\" ) The device is set to \"cpu\" by default, and it can be set to anything, as long as it maps to an existing PyTorch CUDA device. sbi will take care of copying the net and the training data to and from the device . Note that the prior must be on the training device already, e.g., when passing device=\"cuda:0\" , make sure to pass a prior object that was created on that device, e.g., prior = torch.distributions.MultivariateNormal(loc=torch.zeros(2, device=\"cuda:0\"), covariance_matrix=torch.eye(2, device=\"cuda:0\")) .","title":"Can I use the GPU for training the density estimator?"},{"location":"faq/question_04/#performance","text":"Whether or not you reduce your training time when training on a GPU depends on the problem at hand. We provide a couple of default density estimators for SNPE , SNLE and SNRE , e.g., a mixture density network ( density_estimator=\"mdn\" ) or a Masked Autoregressive Flow ( density_estimator=\"maf\" ). For those default density estimators we do not expect a speed up. This is because the underlying neural networks are quite shallow and not tall, e.g., they do not have many parameters or matrix operations that profit a lot from being executed on the GPU. A speed up through training on the GPU will most likely become visible when you are using convolutional modules in your neural networks. E.g., when passing an embedding net for image processing like in this example: https://github.com/sbi-dev/sbi/blob/main/tutorials/05_embedding_net.ipynb .","title":"Performance"},{"location":"faq/question_05/","text":"How should I save and load objects in sbi ? \u00b6 NeuralPosterior objects are picklable. import pickle # ... run inference posterior = inference . build_posterior () with open ( \"/path/to/my_posterior.pkl\" , \"wb\" ) as handle : pickle . dump ( posterior , handle ) Note: posterior objects that were saved under sbi v0.17.2 or older can not be loaded under sbi v0.18.0 or newer. Note: if you try to load a posterior that was saved under sbi v0.14.x or earlier under sbi v0.15.x until sbi v0.17.x , you have to add: import sys from sbi.utils import user_input_checks_utils sys . modules [ \"sbi.user_input.user_input_checks_utils\" ] = user_input_checks_utils to your script before loading the posterior. As of sbi v0.18.0 , NeuralInference objects are also picklable. import pickle # ... run inference posterior = inference . build_posterior () with open ( \"/path/to/my_inference.pkl\" , \"wb\" ) as handle : pickle . dump ( inference , handle ) However, saving and loading the inference object will slightly modify the object (in order to make it serializable). These modifications lead to the following two changes in behaviour: 1) Retraining from scratch is not supported, i.e. .train(..., retrain_from_scratch=True) does not work. 2) When the loaded object calls the .train() method, it generates a new tensorboard summary writer (instead of appending to the current one).","title":"How should I save and load objects in sbi?"},{"location":"faq/question_05/#how-should-i-save-and-load-objects-in-sbi","text":"NeuralPosterior objects are picklable. import pickle # ... run inference posterior = inference . build_posterior () with open ( \"/path/to/my_posterior.pkl\" , \"wb\" ) as handle : pickle . dump ( posterior , handle ) Note: posterior objects that were saved under sbi v0.17.2 or older can not be loaded under sbi v0.18.0 or newer. Note: if you try to load a posterior that was saved under sbi v0.14.x or earlier under sbi v0.15.x until sbi v0.17.x , you have to add: import sys from sbi.utils import user_input_checks_utils sys . modules [ \"sbi.user_input.user_input_checks_utils\" ] = user_input_checks_utils to your script before loading the posterior. As of sbi v0.18.0 , NeuralInference objects are also picklable. import pickle # ... run inference posterior = inference . build_posterior () with open ( \"/path/to/my_inference.pkl\" , \"wb\" ) as handle : pickle . dump ( inference , handle ) However, saving and loading the inference object will slightly modify the object (in order to make it serializable). These modifications lead to the following two changes in behaviour: 1) Retraining from scratch is not supported, i.e. .train(..., retrain_from_scratch=True) does not work. 2) When the loaded object calls the .train() method, it generates a new tensorboard summary writer (instead of appending to the current one).","title":"How should I save and load objects in sbi?"},{"location":"faq/question_06/","text":"Can I stop neural network training and resume it later? \u00b6 Many clusters have a time limit and sbi might exceed this limit. You can circumvent this problem by using the flexible interface . After simulations are finished, sbi trains a neural network. If this process takes too long, you can stop training and resume it later. The syntax is: inference = SNPE ( prior = prior ) inference = inference . append_simulations ( theta , x ) inference . train ( max_num_epochs = 300 ) # Pick `max_num_epochs` such that it does not exceed the runtime. with open ( \"path/to/my/inference.pkl\" , \"wb\" ) as handle : pickle . dump ( inference , handle ) # To resume training: with open ( \"path/to/my/inference.pkl\" , \"rb\" ) as handle : inference_from_disk = pickle . load ( handle ) inference_from_disk . train ( resume_training = True , max_num_epochs = 600 ) # Run epochs 301 until 600 (or stop early). posterior = inference_from_disk . build_posterior ()","title":"Can I stop neural network training and resume it later?"},{"location":"faq/question_06/#can-i-stop-neural-network-training-and-resume-it-later","text":"Many clusters have a time limit and sbi might exceed this limit. You can circumvent this problem by using the flexible interface . After simulations are finished, sbi trains a neural network. If this process takes too long, you can stop training and resume it later. The syntax is: inference = SNPE ( prior = prior ) inference = inference . append_simulations ( theta , x ) inference . train ( max_num_epochs = 300 ) # Pick `max_num_epochs` such that it does not exceed the runtime. with open ( \"path/to/my/inference.pkl\" , \"wb\" ) as handle : pickle . dump ( inference , handle ) # To resume training: with open ( \"path/to/my/inference.pkl\" , \"rb\" ) as handle : inference_from_disk = pickle . load ( handle ) inference_from_disk . train ( resume_training = True , max_num_epochs = 600 ) # Run epochs 301 until 600 (or stop early). posterior = inference_from_disk . build_posterior ()","title":"Can I stop neural network training and resume it later?"},{"location":"faq/question_07/","text":"Can I use a custom prior with sbi? \u00b6 sbi works with torch distributions only so we recommend to use those whenever possible. For example, when you are used to using scipy.stats distributions as priors then we recommend using the corresponding torch.distributions , most common distributions are implemented there. In case you want to use a custom prior that is not in the set of common distributions that\u2019s possible as well: You need to write a prior class that mimicks the behaviour of a torch.distributions.Distribution class. Then sbi will wrap this class to make it a fully functional torch Distribution . Essentially, the class needs two methods: .sample(sample_shape) , where sample_shape is a shape tuple, e.g., (n,) , and returns a batch of n samples, e.g., of shape (n, 2)` for a two dimenional prior. .log_prob(value) method that returns the \u201clog probs\u201d of parameters under the prior, e.g., for a batches of n parameters with shape (n, ndims) it should return a log probs array of shape (n,) . For sbi > 0.17.2 this could look like the following: class CustomUniformPrior : \"\"\"User defined numpy uniform prior. Custom prior with user-defined valid .sample and .log_prob methods. \"\"\" def __init__ ( self , lower : Tensor , upper : Tensor , return_numpy : bool = False ): self . lower = lower self . upper = upper self . dist = BoxUniform ( lower , upper ) self . return_numpy = return_numpy def sample ( self , sample_shape = torch . Size ([])): samples = self . dist . sample ( sample_shape ) return samples . numpy () if self . return_numpy else samples def log_prob ( self , values ): if self . return_numpy : values = torch . as_tensor ( values ) log_probs = self . dist . log_prob ( values ) return log_probs . numpy () if self . return_numpy else log_probs Once you have such a class you can wrap into a Distribution using the process_prior function sbi provides: from sbi.utils import process_prior custom_prior = CustomUniformPrior ( torch . zeros ( 2 ), torch . ones ( 2 )) prior , * _ = process_prior ( custom_prior ) # Keeping only the first return. # use this wrapped prior in sbi... In sbi it is sometimes required to check the support of the prior, e.g., when the prior support is bounded and one wants to reject samples from the posterior density estimator that lie outside the prior support. In torch Distributions this is handled automatically, however, when using a custom prior it is not. Thus, if your prior has bounded support (like the one above) it makes sense to pass the bounds to the wrapper function such that sbi can pass them to torch Distributions : from sbi.utils import process_prior custom_prior = CustomUniformPrior ( torch . zeros ( 2 ), torch . ones ( 2 )) prior = process_prior ( custom_prior , custom_prior_wrapper_kwargs = dict ( lower_bound = torch . zeros ( 2 ), upper_bound = torch . ones ( 2 ))) # use this wrapped prior in sbi... Note that in custom_prior_wrapper_kwargs you can pass additinal arguments for the wrapper, e.g., validate_args or arg_constraints see the Distribution documentation for more details. If you are running sbi < 0.17.2 and use SNLE the code above will produce a NotImplementedError (see #581 ). In this case you need to update to a newer version of sbi or use SNPE instead.","title":"Can I use a custom prior with sbi?"},{"location":"faq/question_07/#can-i-use-a-custom-prior-with-sbi","text":"sbi works with torch distributions only so we recommend to use those whenever possible. For example, when you are used to using scipy.stats distributions as priors then we recommend using the corresponding torch.distributions , most common distributions are implemented there. In case you want to use a custom prior that is not in the set of common distributions that\u2019s possible as well: You need to write a prior class that mimicks the behaviour of a torch.distributions.Distribution class. Then sbi will wrap this class to make it a fully functional torch Distribution . Essentially, the class needs two methods: .sample(sample_shape) , where sample_shape is a shape tuple, e.g., (n,) , and returns a batch of n samples, e.g., of shape (n, 2)` for a two dimenional prior. .log_prob(value) method that returns the \u201clog probs\u201d of parameters under the prior, e.g., for a batches of n parameters with shape (n, ndims) it should return a log probs array of shape (n,) . For sbi > 0.17.2 this could look like the following: class CustomUniformPrior : \"\"\"User defined numpy uniform prior. Custom prior with user-defined valid .sample and .log_prob methods. \"\"\" def __init__ ( self , lower : Tensor , upper : Tensor , return_numpy : bool = False ): self . lower = lower self . upper = upper self . dist = BoxUniform ( lower , upper ) self . return_numpy = return_numpy def sample ( self , sample_shape = torch . Size ([])): samples = self . dist . sample ( sample_shape ) return samples . numpy () if self . return_numpy else samples def log_prob ( self , values ): if self . return_numpy : values = torch . as_tensor ( values ) log_probs = self . dist . log_prob ( values ) return log_probs . numpy () if self . return_numpy else log_probs Once you have such a class you can wrap into a Distribution using the process_prior function sbi provides: from sbi.utils import process_prior custom_prior = CustomUniformPrior ( torch . zeros ( 2 ), torch . ones ( 2 )) prior , * _ = process_prior ( custom_prior ) # Keeping only the first return. # use this wrapped prior in sbi... In sbi it is sometimes required to check the support of the prior, e.g., when the prior support is bounded and one wants to reject samples from the posterior density estimator that lie outside the prior support. In torch Distributions this is handled automatically, however, when using a custom prior it is not. Thus, if your prior has bounded support (like the one above) it makes sense to pass the bounds to the wrapper function such that sbi can pass them to torch Distributions : from sbi.utils import process_prior custom_prior = CustomUniformPrior ( torch . zeros ( 2 ), torch . ones ( 2 )) prior = process_prior ( custom_prior , custom_prior_wrapper_kwargs = dict ( lower_bound = torch . zeros ( 2 ), upper_bound = torch . ones ( 2 ))) # use this wrapped prior in sbi... Note that in custom_prior_wrapper_kwargs you can pass additinal arguments for the wrapper, e.g., validate_args or arg_constraints see the Distribution documentation for more details. If you are running sbi < 0.17.2 and use SNLE the code above will produce a NotImplementedError (see #581 ). In this case you need to update to a newer version of sbi or use SNPE instead.","title":"Can I use a custom prior with sbi?"},{"location":"tutorial/00_getting_started/","text":"Getting started with sbi \u00b6 Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/00_getting_started.ipynb in the sbi repository. import torch from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer Running the inference procedure \u00b6 sbi provides a simple interface to run state-of-the-art algorithms for simulation-based inference. For inference, you need to provide two ingredients: 1) a prior distribution that allows to sample parameter sets. 2) a simulator that takes parameter sets and produces simulation outputs. For example, we can have a 3-dimensional parameter space with a uniform prior between [-1,1] and a simple simulator that for the sake of example adds 1.0 and some Gaussian noise to the parameter set: num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def simulator ( parameter_set ): return 1.0 + parameter_set + torch . randn ( parameter_set . shape ) * 0.1 sbi can then run inference: posterior = infer ( simulator , prior , method = \"SNPE\" , num_simulations = 1000 ) Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Neural network successfully converged after 119 epochs. Let\u2019s say we have made some observation \\(x\\) : observation = torch . zeros ( 3 ) Given this observation, we can then sample from the posterior \\(p(\\theta|x)\\) , evaluate its log-probability, or plot it. samples = posterior . sample (( 10000 ,), x = observation ) log_probability = posterior . log_prob ( samples , x = observation ) _ = analysis . pairplot ( samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 6 , 6 )) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] Next steps \u00b6 The single-line interface described above provides an easy entry for using sbi . However, if you are working on a larger project or need additional features, we strongly recommend using the flexible interface . Requirements for the simulator, prior, and observation \u00b6 In the interface described above, you need to provide a prior and a simulator for training. Let\u2019s talk about what requirements they need to satisfy. Prior \u00b6 A prior is a distribution object that allows to sample parameter sets. Any class for the prior is allowed as long as it allows to call prior.sample() and prior.log_prob() . Simulator \u00b6 The simulator is a Python callable that takes in a parameter set and outputs data with some (even if very small) stochasticity. Allowed data types and shapes for input and output: the input parameter set and the output have to be either a np.ndarray or a torch.Tensor . the input parameter set should have either shape (1,N) or (N) , and the output must have shape (1,M) or (M) . You can call simulators not written in Python as long as you wrap them in a Python function. Observation \u00b6 Once you have a trained posterior, you will want to evaluate or sample the posterior \\(p(\\theta|x_o)\\) at certain observed values \\(x_o\\) : The allowable data types are either Numpy np.ndarray or a torch torch.Tensor . The shape must be either (1,M) or just (M) . Running different algorithms \u00b6 sbi implements three classes of algorithms that can be used to obtain the posterior distribution: SNPE, SNLE, and SNRE. You can try the different algorithms by simply swapping out the method : posterior = infer ( simulator , prior , method = \"SNPE\" , num_simulations = 1000 ) posterior = infer ( simulator , prior , method = \"SNLE\" , num_simulations = 1000 ) posterior = infer ( simulator , prior , method = \"SNRE\" , num_simulations = 1000 ) You can then infer, sample, evaluate, and plot the posterior as described above.","title":"Getting started"},{"location":"tutorial/00_getting_started/#getting-started-with-sbi","text":"Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/00_getting_started.ipynb in the sbi repository. import torch from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer","title":"Getting started with sbi"},{"location":"tutorial/00_getting_started/#running-the-inference-procedure","text":"sbi provides a simple interface to run state-of-the-art algorithms for simulation-based inference. For inference, you need to provide two ingredients: 1) a prior distribution that allows to sample parameter sets. 2) a simulator that takes parameter sets and produces simulation outputs. For example, we can have a 3-dimensional parameter space with a uniform prior between [-1,1] and a simple simulator that for the sake of example adds 1.0 and some Gaussian noise to the parameter set: num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def simulator ( parameter_set ): return 1.0 + parameter_set + torch . randn ( parameter_set . shape ) * 0.1 sbi can then run inference: posterior = infer ( simulator , prior , method = \"SNPE\" , num_simulations = 1000 ) Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Neural network successfully converged after 119 epochs. Let\u2019s say we have made some observation \\(x\\) : observation = torch . zeros ( 3 ) Given this observation, we can then sample from the posterior \\(p(\\theta|x)\\) , evaluate its log-probability, or plot it. samples = posterior . sample (( 10000 ,), x = observation ) log_probability = posterior . log_prob ( samples , x = observation ) _ = analysis . pairplot ( samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 6 , 6 )) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s]","title":"Running the inference procedure"},{"location":"tutorial/00_getting_started/#next-steps","text":"The single-line interface described above provides an easy entry for using sbi . However, if you are working on a larger project or need additional features, we strongly recommend using the flexible interface .","title":"Next steps"},{"location":"tutorial/00_getting_started/#requirements-for-the-simulator-prior-and-observation","text":"In the interface described above, you need to provide a prior and a simulator for training. Let\u2019s talk about what requirements they need to satisfy.","title":"Requirements for the simulator, prior, and observation"},{"location":"tutorial/00_getting_started/#prior","text":"A prior is a distribution object that allows to sample parameter sets. Any class for the prior is allowed as long as it allows to call prior.sample() and prior.log_prob() .","title":"Prior"},{"location":"tutorial/00_getting_started/#simulator","text":"The simulator is a Python callable that takes in a parameter set and outputs data with some (even if very small) stochasticity. Allowed data types and shapes for input and output: the input parameter set and the output have to be either a np.ndarray or a torch.Tensor . the input parameter set should have either shape (1,N) or (N) , and the output must have shape (1,M) or (M) . You can call simulators not written in Python as long as you wrap them in a Python function.","title":"Simulator"},{"location":"tutorial/00_getting_started/#observation","text":"Once you have a trained posterior, you will want to evaluate or sample the posterior \\(p(\\theta|x_o)\\) at certain observed values \\(x_o\\) : The allowable data types are either Numpy np.ndarray or a torch torch.Tensor . The shape must be either (1,M) or just (M) .","title":"Observation"},{"location":"tutorial/00_getting_started/#running-different-algorithms","text":"sbi implements three classes of algorithms that can be used to obtain the posterior distribution: SNPE, SNLE, and SNRE. You can try the different algorithms by simply swapping out the method : posterior = infer ( simulator , prior , method = \"SNPE\" , num_simulations = 1000 ) posterior = infer ( simulator , prior , method = \"SNLE\" , num_simulations = 1000 ) posterior = infer ( simulator , prior , method = \"SNRE\" , num_simulations = 1000 ) You can then infer, sample, evaluate, and plot the posterior as described above.","title":"Running different algorithms"},{"location":"tutorial/01_gaussian_amortized/","text":"Amortized posterior inference on Gaussian example \u00b6 Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/01_gaussian_amortized.ipynb in the sbi repository. In this tutorial, we will demonstrate how sbi can infer an amortized posterior for a simple toy model with a uniform prior and Gaussian likelihood. import torch import numpy as np from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer Defining prior, simulator, and running inference \u00b6 Say we have a 3-dimensional parameter space, and the prior is uniformly distributed between -2 and 2 in each dimension, i.e. \\(\\theta \\in [-2,2], y\\in [-2,2], z \\in [-2,2]\\) . num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) Our simulator takes the input parameters, adds 1.0 in each dimension, and then adds some Gaussian noise: def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 We can then run inference: posterior = infer ( linear_gaussian , prior , \"SNPE\" , num_simulations = 1000 ) Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Neural network successfully converged after 97 epochs. Amortized inference \u00b6 Note that we have not yet provided an observation to the inference procedure. In fact, we can evaluate the posterior for different observations without having to re-run inference. This is called amortization. An amortized posterior is one that is not focused on any particular observation. Naturally, if the diversity of observations is large, any of the inference methods will need to run a sufficient number of simulations for the resulting posterior to perform well across these diverse observations. Let\u2019s say we have two observations x_o_1 = [0,0,0] and x_o_2 = [2,2,2] : x_o_1 = torch . zeros ( 3 , ) x_o_2 = 2.0 * torch . ones ( 3 , ) We can draw samples from the posterior given x_o_1 and then plot them: posterior_samples_1 = posterior . sample (( 10000 ,), x = x_o_1 ) # plot posterior samples _ = analysis . pairplot ( posterior_samples_1 , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] As it can be seen, the posterior samples are centered around [-1,-1,-1] in each dimension. This makes sense because the simulator always adds 1.0 in each dimension and we have observed x_o_1 = [0,0,0] . Since the learned posterior is amortized, we can also draw samples from the posterior given the second observation without having to re-run inference: posterior_samples_2 = posterior . sample (( 10000 ,), x = x_o_2 ) # plot posterior samples _ = analysis . pairplot ( posterior_samples_2 , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] So, if we observed x_o_2 = [2,2,2] , the posterior is centered around [1,1,1] \u2013 again, this makes sense because the simulator adds 1.0 in each dimension.","title":"Amortized inference"},{"location":"tutorial/01_gaussian_amortized/#amortized-posterior-inference-on-gaussian-example","text":"Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/01_gaussian_amortized.ipynb in the sbi repository. In this tutorial, we will demonstrate how sbi can infer an amortized posterior for a simple toy model with a uniform prior and Gaussian likelihood. import torch import numpy as np from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer","title":"Amortized posterior inference on Gaussian example"},{"location":"tutorial/01_gaussian_amortized/#defining-prior-simulator-and-running-inference","text":"Say we have a 3-dimensional parameter space, and the prior is uniformly distributed between -2 and 2 in each dimension, i.e. \\(\\theta \\in [-2,2], y\\in [-2,2], z \\in [-2,2]\\) . num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) Our simulator takes the input parameters, adds 1.0 in each dimension, and then adds some Gaussian noise: def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 We can then run inference: posterior = infer ( linear_gaussian , prior , \"SNPE\" , num_simulations = 1000 ) Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Neural network successfully converged after 97 epochs.","title":"Defining prior, simulator, and running inference"},{"location":"tutorial/01_gaussian_amortized/#amortized-inference","text":"Note that we have not yet provided an observation to the inference procedure. In fact, we can evaluate the posterior for different observations without having to re-run inference. This is called amortization. An amortized posterior is one that is not focused on any particular observation. Naturally, if the diversity of observations is large, any of the inference methods will need to run a sufficient number of simulations for the resulting posterior to perform well across these diverse observations. Let\u2019s say we have two observations x_o_1 = [0,0,0] and x_o_2 = [2,2,2] : x_o_1 = torch . zeros ( 3 , ) x_o_2 = 2.0 * torch . ones ( 3 , ) We can draw samples from the posterior given x_o_1 and then plot them: posterior_samples_1 = posterior . sample (( 10000 ,), x = x_o_1 ) # plot posterior samples _ = analysis . pairplot ( posterior_samples_1 , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] As it can be seen, the posterior samples are centered around [-1,-1,-1] in each dimension. This makes sense because the simulator always adds 1.0 in each dimension and we have observed x_o_1 = [0,0,0] . Since the learned posterior is amortized, we can also draw samples from the posterior given the second observation without having to re-run inference: posterior_samples_2 = posterior . sample (( 10000 ,), x = x_o_2 ) # plot posterior samples _ = analysis . pairplot ( posterior_samples_2 , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] So, if we observed x_o_2 = [2,2,2] , the posterior is centered around [1,1,1] \u2013 again, this makes sense because the simulator adds 1.0 in each dimension.","title":"Amortized inference"},{"location":"tutorial/02_flexible_interface/","text":"The flexible interface \u00b6 In the previous tutorial, we have demonstrated how sbi can be used to run simulation-based inference with just a single line of code. In addition to this simple interface, sbi also provides a flexible interface which provides several additional features implemented in sbi . Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/02_flexible_interface.ipynb in the sbi repository. Features \u00b6 The flexible interface offers the following features (and many more): performing sequential posterior estimation by focusing on a particular observation over multiple rounds. This can decrease the number of simulations one has to run, but the inference procedure is no longer amortized ( tutorial ). specify your own density estimator, or change hyperparameters of existing ones (e.g. number of hidden units for NSF ) ( tutorial ). use an embedding_net to learn summary features from high-dimensional simulation outputs ( tutorial ). provide presimulated data choose between different methods to sample from the posterior. use calibration kernels as proposed by Lueckmann, Goncalves et al. 2017 . Main syntax \u00b6 from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi simulator , prior = prepare_for_sbi ( simulator , prior ) inference = SNPE ( prior ) theta , x = simulate_for_sbi ( simulator , proposal = prior , num_simulations = 1000 ) density_estimator = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior ( density_estimator ) Linear Gaussian example \u00b6 We will show an example of how we can use the flexible interface to infer the posterior for an example with a Gaussian likelihood (same example as before). First, we import the inference method we want to use ( SNPE , SNLE , or SNRE ) and other helper functions. import torch from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.utils.get_nn_models import posterior_nn from sbi import utils as utils from sbi import analysis as analysis Next, we define the prior and simulator: num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 In the flexible interface, you have to ensure that your simulator and prior adhere the requirements of sbi . You can do so with the prepare_for_sbi() function. simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) Then, we instantiate the inference object: inference = SNPE ( prior = prior ) Next, we run simulations. You can do so either by yourself by sampling from the prior and running the simulator (e.g. on a compute cluster), or you can use a helper function provided by sbi called simulate_for_sbi . This function allows to parallelize your code with joblib . theta , x = simulate_for_sbi ( simulator , proposal = prior , num_simulations = 2000 ) Running 2000 simulations.: 0%| | 0/2000 [00:00<?, ?it/s] We then pass the simulated data to the inference object. theta and x should both be a torch.Tensor of type float32 . inference = inference . append_simulations ( theta , x ) Next, we train the neural density estimator. density_estimator = inference . train () Neural network successfully converged after 73 epochs. Lastly, we can use this density estimator to build the posterior: posterior = inference . build_posterior ( density_estimator ) Once we have obtained the posterior, we can .sample() , .log_prob() , or .pairplot() in the same way as for the simple interface. x_o = torch . zeros ( 3 , ) posterior_samples = posterior . sample (( 10000 ,), x = x_o ) # plot posterior samples _ = analysis . pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] We can always print the posterior to know how it was trained: print ( posterior ) Posterior conditional density p(\u03b8|x) of type DirectPosterior. It samples the posterior network and rejects samples that lie outside of the prior bounds.","title":"Flexible interface"},{"location":"tutorial/02_flexible_interface/#the-flexible-interface","text":"In the previous tutorial, we have demonstrated how sbi can be used to run simulation-based inference with just a single line of code. In addition to this simple interface, sbi also provides a flexible interface which provides several additional features implemented in sbi . Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/02_flexible_interface.ipynb in the sbi repository.","title":"The flexible interface"},{"location":"tutorial/02_flexible_interface/#features","text":"The flexible interface offers the following features (and many more): performing sequential posterior estimation by focusing on a particular observation over multiple rounds. This can decrease the number of simulations one has to run, but the inference procedure is no longer amortized ( tutorial ). specify your own density estimator, or change hyperparameters of existing ones (e.g. number of hidden units for NSF ) ( tutorial ). use an embedding_net to learn summary features from high-dimensional simulation outputs ( tutorial ). provide presimulated data choose between different methods to sample from the posterior. use calibration kernels as proposed by Lueckmann, Goncalves et al. 2017 .","title":"Features"},{"location":"tutorial/02_flexible_interface/#main-syntax","text":"from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi simulator , prior = prepare_for_sbi ( simulator , prior ) inference = SNPE ( prior ) theta , x = simulate_for_sbi ( simulator , proposal = prior , num_simulations = 1000 ) density_estimator = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior ( density_estimator )","title":"Main syntax"},{"location":"tutorial/02_flexible_interface/#linear-gaussian-example","text":"We will show an example of how we can use the flexible interface to infer the posterior for an example with a Gaussian likelihood (same example as before). First, we import the inference method we want to use ( SNPE , SNLE , or SNRE ) and other helper functions. import torch from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.utils.get_nn_models import posterior_nn from sbi import utils as utils from sbi import analysis as analysis Next, we define the prior and simulator: num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 In the flexible interface, you have to ensure that your simulator and prior adhere the requirements of sbi . You can do so with the prepare_for_sbi() function. simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) Then, we instantiate the inference object: inference = SNPE ( prior = prior ) Next, we run simulations. You can do so either by yourself by sampling from the prior and running the simulator (e.g. on a compute cluster), or you can use a helper function provided by sbi called simulate_for_sbi . This function allows to parallelize your code with joblib . theta , x = simulate_for_sbi ( simulator , proposal = prior , num_simulations = 2000 ) Running 2000 simulations.: 0%| | 0/2000 [00:00<?, ?it/s] We then pass the simulated data to the inference object. theta and x should both be a torch.Tensor of type float32 . inference = inference . append_simulations ( theta , x ) Next, we train the neural density estimator. density_estimator = inference . train () Neural network successfully converged after 73 epochs. Lastly, we can use this density estimator to build the posterior: posterior = inference . build_posterior ( density_estimator ) Once we have obtained the posterior, we can .sample() , .log_prob() , or .pairplot() in the same way as for the simple interface. x_o = torch . zeros ( 3 , ) posterior_samples = posterior . sample (( 10000 ,), x = x_o ) # plot posterior samples _ = analysis . pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] We can always print the posterior to know how it was trained: print ( posterior ) Posterior conditional density p(\u03b8|x) of type DirectPosterior. It samples the posterior network and rejects samples that lie outside of the prior bounds.","title":"Linear Gaussian example"},{"location":"tutorial/03_multiround_inference/","text":"Multi-round inference \u00b6 In the previous tutorials, we have inferred the posterior using single-round inference . In single-round inference , we draw parameters from the prior, simulate the corresponding data, and then train a neural network to obtain the posterior. However, if one is interested in only one particular observation x_o sampling from the prior can be inefficient in the number of simulations because one is effectively learning a posterior estimate for all observations in the prior space. In this tutorial, we show how one can alleviate this issue by performing multi-round inference with sbi . Multi-round inference also starts by drawing parameters from the prior, simulating them, and training a neural network to estimate the posterior distribution. Afterwards, however, it continues inference in multiple rounds, focusing on a particular observation x_o . In each new round of inference, it draws samples from the obtained posterior distribution conditioned at x_o (instead of from the prior), simulates these, and trains the network again. This process can be repeated arbitrarily often to get increasingly good approximations to the true posterior distribution at x_o . Running multi-round inference can be more efficient in the number of simulations, but it will lead to the posterior no longer being amortized (i.e. it will be accurate only for a specific observation x_o , not for any x ). Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/03_multiround_inference.ipynb in the sbi repository. Main syntax \u00b6 # 2 rounds: first round simulates from the prior, second round simulates parameter set # that were sampled from the obtained posterior. num_rounds = 2 # The specific observation we want to focus the inference on. x_o = torch . zeros ( 3 , ) posteriors = [] proposal = prior for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) # In `SNLE` and `SNRE`, you should not pass the `proposal` to `.append_simulations()` density_estimator = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior ( density_estimator ) posteriors . append ( posterior ) proposal = posterior . set_default_x ( x_o ) Linear Gaussian example \u00b6 Below, we give a full example of inferring the posterior distribution over multiple rounds. import torch from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.utils.get_nn_models import posterior_nn from sbi import utils as utils from sbi import analysis as analysis _ = torch . manual_seed ( 0 ) First, we define a simple prior and simulator and ensure that they comply with sbi by using prepare_for_sbi : num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) Then, we instantiate the inference object: inference = SNPE ( prior = prior ) And we can run inference. In this example, we will run inference over 2 rounds, potentially leading to a more focused posterior around the observation x_o . num_rounds = 2 x_o = torch . zeros ( 3 , ) posteriors = [] proposal = prior for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) density_estimator = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior ( density_estimator ) posteriors . append ( posterior ) proposal = posterior . set_default_x ( x_o ) Running 500 simulations.: 0%| | 0/500 [00:00<?, ?it/s] Neural network successfully converged after 157 epochs. Drawing 500 posterior samples: 0%| | 0/500 [00:00<?, ?it/s] Running 500 simulations.: 0%| | 0/500 [00:00<?, ?it/s] Using SNPE-C with atomic loss Neural network successfully converged after 58 epochs. Note that, for num_rounds>1 , the posterior is no longer amortized: it will give good results when sampled around x=observation , but possibly bad results for other x . Once we have obtained the posterior, we can .sample() , .log_prob() , or .pairplot() in the same way as for the simple interface. posterior_samples = posterior . sample (( 10000 ,), x = x_o ) # plot posterior samples _ = analysis . pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s]","title":"Multi-round inference"},{"location":"tutorial/03_multiround_inference/#multi-round-inference","text":"In the previous tutorials, we have inferred the posterior using single-round inference . In single-round inference , we draw parameters from the prior, simulate the corresponding data, and then train a neural network to obtain the posterior. However, if one is interested in only one particular observation x_o sampling from the prior can be inefficient in the number of simulations because one is effectively learning a posterior estimate for all observations in the prior space. In this tutorial, we show how one can alleviate this issue by performing multi-round inference with sbi . Multi-round inference also starts by drawing parameters from the prior, simulating them, and training a neural network to estimate the posterior distribution. Afterwards, however, it continues inference in multiple rounds, focusing on a particular observation x_o . In each new round of inference, it draws samples from the obtained posterior distribution conditioned at x_o (instead of from the prior), simulates these, and trains the network again. This process can be repeated arbitrarily often to get increasingly good approximations to the true posterior distribution at x_o . Running multi-round inference can be more efficient in the number of simulations, but it will lead to the posterior no longer being amortized (i.e. it will be accurate only for a specific observation x_o , not for any x ). Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/03_multiround_inference.ipynb in the sbi repository.","title":"Multi-round inference"},{"location":"tutorial/03_multiround_inference/#main-syntax","text":"# 2 rounds: first round simulates from the prior, second round simulates parameter set # that were sampled from the obtained posterior. num_rounds = 2 # The specific observation we want to focus the inference on. x_o = torch . zeros ( 3 , ) posteriors = [] proposal = prior for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) # In `SNLE` and `SNRE`, you should not pass the `proposal` to `.append_simulations()` density_estimator = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior ( density_estimator ) posteriors . append ( posterior ) proposal = posterior . set_default_x ( x_o )","title":"Main syntax"},{"location":"tutorial/03_multiround_inference/#linear-gaussian-example","text":"Below, we give a full example of inferring the posterior distribution over multiple rounds. import torch from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.utils.get_nn_models import posterior_nn from sbi import utils as utils from sbi import analysis as analysis _ = torch . manual_seed ( 0 ) First, we define a simple prior and simulator and ensure that they comply with sbi by using prepare_for_sbi : num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) Then, we instantiate the inference object: inference = SNPE ( prior = prior ) And we can run inference. In this example, we will run inference over 2 rounds, potentially leading to a more focused posterior around the observation x_o . num_rounds = 2 x_o = torch . zeros ( 3 , ) posteriors = [] proposal = prior for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) density_estimator = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior ( density_estimator ) posteriors . append ( posterior ) proposal = posterior . set_default_x ( x_o ) Running 500 simulations.: 0%| | 0/500 [00:00<?, ?it/s] Neural network successfully converged after 157 epochs. Drawing 500 posterior samples: 0%| | 0/500 [00:00<?, ?it/s] Running 500 simulations.: 0%| | 0/500 [00:00<?, ?it/s] Using SNPE-C with atomic loss Neural network successfully converged after 58 epochs. Note that, for num_rounds>1 , the posterior is no longer amortized: it will give good results when sampled around x=observation , but possibly bad results for other x . Once we have obtained the posterior, we can .sample() , .log_prob() , or .pairplot() in the same way as for the simple interface. posterior_samples = posterior . sample (( 10000 ,), x = x_o ) # plot posterior samples _ = analysis . pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s]","title":"Linear Gaussian example"},{"location":"tutorial/04_density_estimators/","text":"Customizing the density estimator \u00b6 sbi allows to specify a custom density estimator for each of the implemented methods. For all options, check the API reference here . Changing the type of density estimator \u00b6 One option is to use one of set of preconfigured density estimators by passing a string in the density_estimator keyword argument to the inference object ( SNPE or SNLE ), e.g., \u201cmaf\u201d to use a Masked Autoregressive Flow, of \u201cnsf\u201d to use a Neural Spline Flow with default hyperparameters. inference = SNPE ( prior = prior , density_estimator = \"maf\" ) In the case of SNRE , the argument is called classifier : inference = SNRE ( prior = prior , classifier = \"resnet\" ) Changing hyperparameters of density estimators \u00b6 Alternatively, you can use a set of utils functions to configure a density estimator yourself, e.g., use a MAF with hyperparameters chosen for your problem at hand. Here, because we want to use SN*P*E, we specifiy a neural network targeting the posterior (using the utils function posterior_nn ). In this example, we will create a neural spline flow ( 'nsf' ) with 60 hidden units and 3 transform layers: from sbi.utils.get_nn_models import ( posterior_nn , ) # For SNLE: likelihood_nn(). For SNRE: classifier_nn() density_estimator_build_fun = posterior_nn ( model = \"nsf\" , hidden_features = 60 , num_transforms = 3 ) inference = SNPE ( prior = prior , density_estimator = density_estimator_build_fun ) It is also possible to pass an embedding_net to posterior_nn() which learn summary statistics from high-dimensional simulation outputs. You can find a more detailed tutorial on this here . Building new density estimators from scratch \u00b6 Finally, it is also possible to implement your own density estimator from scratch, e.g., including embedding nets to preprocess data, or to a density estimator architecture of your choice. For this, the density_estimator argument needs to be a function that takes theta and x batches as arguments to then construct the density estimator after the first set of simulations was generated. Our utils functions in sbi/utils/get_nn_models.py return such a function.","title":"Custom density estimators"},{"location":"tutorial/04_density_estimators/#customizing-the-density-estimator","text":"sbi allows to specify a custom density estimator for each of the implemented methods. For all options, check the API reference here .","title":"Customizing the density estimator"},{"location":"tutorial/04_density_estimators/#changing-the-type-of-density-estimator","text":"One option is to use one of set of preconfigured density estimators by passing a string in the density_estimator keyword argument to the inference object ( SNPE or SNLE ), e.g., \u201cmaf\u201d to use a Masked Autoregressive Flow, of \u201cnsf\u201d to use a Neural Spline Flow with default hyperparameters. inference = SNPE ( prior = prior , density_estimator = \"maf\" ) In the case of SNRE , the argument is called classifier : inference = SNRE ( prior = prior , classifier = \"resnet\" )","title":"Changing the type of density estimator"},{"location":"tutorial/04_density_estimators/#changing-hyperparameters-of-density-estimators","text":"Alternatively, you can use a set of utils functions to configure a density estimator yourself, e.g., use a MAF with hyperparameters chosen for your problem at hand. Here, because we want to use SN*P*E, we specifiy a neural network targeting the posterior (using the utils function posterior_nn ). In this example, we will create a neural spline flow ( 'nsf' ) with 60 hidden units and 3 transform layers: from sbi.utils.get_nn_models import ( posterior_nn , ) # For SNLE: likelihood_nn(). For SNRE: classifier_nn() density_estimator_build_fun = posterior_nn ( model = \"nsf\" , hidden_features = 60 , num_transforms = 3 ) inference = SNPE ( prior = prior , density_estimator = density_estimator_build_fun ) It is also possible to pass an embedding_net to posterior_nn() which learn summary statistics from high-dimensional simulation outputs. You can find a more detailed tutorial on this here .","title":"Changing hyperparameters of density estimators"},{"location":"tutorial/04_density_estimators/#building-new-density-estimators-from-scratch","text":"Finally, it is also possible to implement your own density estimator from scratch, e.g., including embedding nets to preprocess data, or to a density estimator architecture of your choice. For this, the density_estimator argument needs to be a function that takes theta and x batches as arguments to then construct the density estimator after the first set of simulations was generated. Our utils functions in sbi/utils/get_nn_models.py return such a function.","title":"Building new density estimators from scratch"},{"location":"tutorial/05_embedding_net/","text":"Learning summary statistics with a neural net \u00b6 When doing simulation-based inference, it is very important to use well-chosen summary statistics for describing the data generated by the simulator. Usually, these statistics take into account domain knowledge. For instance, in the example of the Hodgkin-Huxley model , the summary statistics are defined by a function which takes a 120 ms recording as input (a 12000-dimensional input vector) and outputs a 7-dimensional feature vector containing different statistical descriptors of the recording (e.g., number of spikes, average value, etc.). However, in other cases, it might be of interest to actually learn from the data which summary statistics to use, e.g., because the raw data is highly complex and domain knowledge is not available or not applicable. sbi offers functionality to learn summary statistics from (potentially high-dimensional) simulation outputs with a neural network. In sbi , this neural network is referred to as embedding_net . If an embedding_net is specified, the simulation outputs are passed through the embedding_net , whose outputs are then passed to the neural density estimator. The parameters of the embedding_net are learned together with the parameters of the neural density estimator. NB: only SNPE and SNRE methods can use an embedding_net to learn summary statistics from simulation outputs. SNLE does not offer this functionality since the simulation outputs \\(x\\) are the outputs of the neural density estimator. In the example that follows, we illustrate a situation where the data points generated by the simulator model are high-dimensional (32 by 32 images) and we use a convolutional neural network as summary statistics extractor. Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/05_embedding_net.ipynb in the sbi repository. First of all, we import all the packages required for running the tutorial import matplotlib.pyplot as plt import torch import torch.nn as nn import torch.nn.functional as F from sbi import utils from sbi import analysis from sbi import inference from sbi.inference import SNPE , simulate_for_sbi , prepare_for_sbi seed = 0 torch . manual_seed ( seed ) <torch._C.Generator at 0x7f124d5d04b0> The simulator model \u00b6 The simulator model that we consider has two parameters: \\(r\\) and \\(\\theta\\) . On each run, it generates 100 two-dimensional points centered around \\((r \\cos(\\theta), r \\sin(\\theta))\\) and perturbed by a Gaussian noise with variance 0.01. Instead of simply outputting the \\((x,y)\\) coordinates of each data point, the model generates a grayscale image of the scattered points with dimensions 32 by 32. This image is further perturbed by an uniform noise with values betweeen 0 and 0.2. The code below defines such model. def simulator_model ( parameter , return_points = False ): \"\"\"Simulator model with two-dimensional input parameter and 1024-dimensional output This simulator serves as a basic example for using a neural net for learning summary features. It has only two input parameters but generates high-dimensional output vectors. The data is generated as follows: (-) Input: parameter = [r, theta] (1) Generate 100 two-dimensional points centered around (r cos(theta),r sin(theta)) and perturbed by a Gaussian noise with variance 0.01 (2) Create a grayscale image I of the scattered points with dimensions 32 by 32 (3) Perturb I with an uniform noise with values betweeen 0 and 0.2 (-) Output: I Parameters ---------- parameter : array-like, shape (2) The two input parameters of the model, ordered as [r, theta] return_points : bool (default: False) Whether the simulator should return the coordinates of the simulated data points as well Returns ------- I: torch tensor, shape (1, 1024) Output flattened image (optional) points: array-like, shape (100, 2) Coordinates of the 2D simulated data points \"\"\" r = parameter [ 0 ] theta = parameter [ 1 ] sigma_points = 0.10 npoints = 100 points = [] for _ in range ( npoints ): x = r * torch . cos ( theta ) + sigma_points * torch . randn ( 1 ) y = r * torch . sin ( theta ) + sigma_points * torch . randn ( 1 ) points . append ([ x , y ]) points = torch . as_tensor ( points ) nx = 32 ny = 32 sigma_image = 0.20 I = torch . zeros ( nx , ny ) for point in points : pi = int (( point [ 0 ] - ( - 1 )) / (( + 1 ) - ( - 1 )) * nx ) pj = int (( point [ 1 ] - ( - 1 )) / (( + 1 ) - ( - 1 )) * ny ) if ( pi < nx ) and ( pj < ny ): I [ pi , pj ] = 1 I = I + sigma_image * torch . rand ( nx , ny ) I = I . T I = I . reshape ( 1 , - 1 ) if return_points : return I , points else : return I The figure below shows an example of the output of the simulator when \\(r = 0.70\\) and \\(\\theta = \\pi/4\\) # simulate samples true_parameter = torch . tensor ([ 0.70 , torch . pi / 4 ]) x_observed , x_points = simulator_model ( true_parameter , return_points = True ) # plot the observation fig , ax = plt . subplots ( facecolor = \"white\" , figsize = ( 11.15 , 5.61 ), ncols = 2 , constrained_layout = True ) circle = plt . Circle (( 0 , 0 ), 1.0 , color = \"k\" , ls = \"--\" , lw = 0.8 , fill = False ) ax [ 0 ] . add_artist ( circle ) ax [ 0 ] . scatter ( x_points [:, 0 ], x_points [:, 1 ], s = 20 ) ax [ 0 ] . set_xlabel ( \"x\" ) ax [ 0 ] . set_ylabel ( \"y\" ) ax [ 0 ] . set_xlim ( - 1 , + 1 ) ax [ 0 ] . set_xticks ([ - 1 , 0.0 , + 1.0 ]) ax [ 0 ] . set_ylim ( - 1 , + 1 ) ax [ 0 ] . set_yticks ([ - 1 , 0.0 , + 1.0 ]) ax [ 0 ] . set_title ( r \"original simulated points with $r = 0.70$ and $\\theta = \\pi/4$\" ) ax [ 1 ] . imshow ( x_observed . view ( 32 , 32 ), origin = \"lower\" , cmap = \"gray\" ) ax [ 1 ] . set_xticks ([]) ax [ 1 ] . set_yticks ([]) ax [ 1 ] . set_title ( \"noisy observed data (gray image with 32 x 32 pixels)\" ) Text(0.5, 1.0, 'noisy observed data (gray image with 32 x 32 pixels)') Defining an embedding_net \u00b6 An inference procedure applied to the output data from this simulator model determines the posterior distribution of \\(r\\) and \\(\\theta\\) given an observation of \\(x\\) , which lives in a 1024 dimensional space (32 x 32 = 1024). To avoid working directly on these high-dimensional vectors, one can use a convolutional neural network (CNN) that takes the 32x32 images as input and encodes them into 8-dimensional feature vectors. This CNN is trained along with the neural density estimator of the inference procedure and serves as an automatic summary statistics extractor. We define and instantiate the CNN as follows: class SummaryNet ( nn . Module ): def __init__ ( self ): super () . __init__ () # 2D convolutional layer self . conv1 = nn . Conv2d ( in_channels = 1 , out_channels = 6 , kernel_size = 5 , padding = 2 ) # Maxpool layer that reduces 32x32 image to 4x4 self . pool = nn . MaxPool2d ( kernel_size = 8 , stride = 8 ) # Fully connected layer taking as input the 6 flattened output arrays from the maxpooling layer self . fc = nn . Linear ( in_features = 6 * 4 * 4 , out_features = 8 ) def forward ( self , x ): x = x . view ( - 1 , 1 , 32 , 32 ) x = self . pool ( F . relu ( self . conv1 ( x ))) x = x . view ( - 1 , 6 * 4 * 4 ) x = F . relu ( self . fc ( x )) return x embedding_net = SummaryNet () The inference procedure \u00b6 With the embedding_net defined and instantiated, we can follow the usual workflow of an inference procedure in sbi . The embedding_net object appears as an input argument when instantiating the neural density estimator with utils.posterior_nn . # set prior distribution for the parameters prior = utils . BoxUniform ( low = torch . tensor ([ 0.0 , 0.0 ]), high = torch . tensor ([ 1.0 , 2 * torch . pi ]) ) # make a SBI-wrapper on the simulator object for compatibility simulator_wrapper , prior = prepare_for_sbi ( simulator_model , prior ) # instantiate the neural density estimator neural_posterior = utils . posterior_nn ( model = \"maf\" , embedding_net = embedding_net , hidden_features = 10 , num_transforms = 2 ) # setup the inference procedure with the SNPE-C procedure inference = SNPE ( prior = prior , density_estimator = neural_posterior ) # run the inference procedure on one round and 10000 simulated data points theta , x = simulate_for_sbi ( simulator_wrapper , prior , num_simulations = 10000 ) Running 10000 simulations.: 0%| | 0/10000 [00:00<?, ?it/s] density_estimator = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior ( density_estimator ) Neural network successfully converged after 76 epochs. Visualizing the results \u00b6 We now generate 50000 samples of the posterior distribution of \\(r\\) and \\(\\theta\\) when observing an input data point \\(x\\) generated from the simulator model with \\(r = 0.70\\) and \\(\\theta = \\pi/4\\) . # generate posterior samples true_parameter = torch . tensor ([ 0.70 , torch . pi / 4 ]) x_observed = simulator_model ( true_parameter ) samples = posterior . set_default_x ( x_observed ) . sample (( 50000 ,)) Drawing 50000 posterior samples: 0%| | 0/50000 [00:00<?, ?it/s] The figure below shows the statistics of the generated samples. # create the figure fig , ax = analysis . pairplot ( samples , points = true_parameter , labels = [ \"r\" , r \"$\\theta$\" ], limits = [[ 0 , 1 ], [ 0 , 2 * torch . pi ]], points_colors = \"r\" , points_offdiag = { \"markersize\" : 6 }, figsize = ( 5 , 5 ), )","title":"Learning summary statistics"},{"location":"tutorial/05_embedding_net/#learning-summary-statistics-with-a-neural-net","text":"When doing simulation-based inference, it is very important to use well-chosen summary statistics for describing the data generated by the simulator. Usually, these statistics take into account domain knowledge. For instance, in the example of the Hodgkin-Huxley model , the summary statistics are defined by a function which takes a 120 ms recording as input (a 12000-dimensional input vector) and outputs a 7-dimensional feature vector containing different statistical descriptors of the recording (e.g., number of spikes, average value, etc.). However, in other cases, it might be of interest to actually learn from the data which summary statistics to use, e.g., because the raw data is highly complex and domain knowledge is not available or not applicable. sbi offers functionality to learn summary statistics from (potentially high-dimensional) simulation outputs with a neural network. In sbi , this neural network is referred to as embedding_net . If an embedding_net is specified, the simulation outputs are passed through the embedding_net , whose outputs are then passed to the neural density estimator. The parameters of the embedding_net are learned together with the parameters of the neural density estimator. NB: only SNPE and SNRE methods can use an embedding_net to learn summary statistics from simulation outputs. SNLE does not offer this functionality since the simulation outputs \\(x\\) are the outputs of the neural density estimator. In the example that follows, we illustrate a situation where the data points generated by the simulator model are high-dimensional (32 by 32 images) and we use a convolutional neural network as summary statistics extractor. Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/05_embedding_net.ipynb in the sbi repository. First of all, we import all the packages required for running the tutorial import matplotlib.pyplot as plt import torch import torch.nn as nn import torch.nn.functional as F from sbi import utils from sbi import analysis from sbi import inference from sbi.inference import SNPE , simulate_for_sbi , prepare_for_sbi seed = 0 torch . manual_seed ( seed ) <torch._C.Generator at 0x7f124d5d04b0>","title":"Learning summary statistics with a neural net"},{"location":"tutorial/05_embedding_net/#the-simulator-model","text":"The simulator model that we consider has two parameters: \\(r\\) and \\(\\theta\\) . On each run, it generates 100 two-dimensional points centered around \\((r \\cos(\\theta), r \\sin(\\theta))\\) and perturbed by a Gaussian noise with variance 0.01. Instead of simply outputting the \\((x,y)\\) coordinates of each data point, the model generates a grayscale image of the scattered points with dimensions 32 by 32. This image is further perturbed by an uniform noise with values betweeen 0 and 0.2. The code below defines such model. def simulator_model ( parameter , return_points = False ): \"\"\"Simulator model with two-dimensional input parameter and 1024-dimensional output This simulator serves as a basic example for using a neural net for learning summary features. It has only two input parameters but generates high-dimensional output vectors. The data is generated as follows: (-) Input: parameter = [r, theta] (1) Generate 100 two-dimensional points centered around (r cos(theta),r sin(theta)) and perturbed by a Gaussian noise with variance 0.01 (2) Create a grayscale image I of the scattered points with dimensions 32 by 32 (3) Perturb I with an uniform noise with values betweeen 0 and 0.2 (-) Output: I Parameters ---------- parameter : array-like, shape (2) The two input parameters of the model, ordered as [r, theta] return_points : bool (default: False) Whether the simulator should return the coordinates of the simulated data points as well Returns ------- I: torch tensor, shape (1, 1024) Output flattened image (optional) points: array-like, shape (100, 2) Coordinates of the 2D simulated data points \"\"\" r = parameter [ 0 ] theta = parameter [ 1 ] sigma_points = 0.10 npoints = 100 points = [] for _ in range ( npoints ): x = r * torch . cos ( theta ) + sigma_points * torch . randn ( 1 ) y = r * torch . sin ( theta ) + sigma_points * torch . randn ( 1 ) points . append ([ x , y ]) points = torch . as_tensor ( points ) nx = 32 ny = 32 sigma_image = 0.20 I = torch . zeros ( nx , ny ) for point in points : pi = int (( point [ 0 ] - ( - 1 )) / (( + 1 ) - ( - 1 )) * nx ) pj = int (( point [ 1 ] - ( - 1 )) / (( + 1 ) - ( - 1 )) * ny ) if ( pi < nx ) and ( pj < ny ): I [ pi , pj ] = 1 I = I + sigma_image * torch . rand ( nx , ny ) I = I . T I = I . reshape ( 1 , - 1 ) if return_points : return I , points else : return I The figure below shows an example of the output of the simulator when \\(r = 0.70\\) and \\(\\theta = \\pi/4\\) # simulate samples true_parameter = torch . tensor ([ 0.70 , torch . pi / 4 ]) x_observed , x_points = simulator_model ( true_parameter , return_points = True ) # plot the observation fig , ax = plt . subplots ( facecolor = \"white\" , figsize = ( 11.15 , 5.61 ), ncols = 2 , constrained_layout = True ) circle = plt . Circle (( 0 , 0 ), 1.0 , color = \"k\" , ls = \"--\" , lw = 0.8 , fill = False ) ax [ 0 ] . add_artist ( circle ) ax [ 0 ] . scatter ( x_points [:, 0 ], x_points [:, 1 ], s = 20 ) ax [ 0 ] . set_xlabel ( \"x\" ) ax [ 0 ] . set_ylabel ( \"y\" ) ax [ 0 ] . set_xlim ( - 1 , + 1 ) ax [ 0 ] . set_xticks ([ - 1 , 0.0 , + 1.0 ]) ax [ 0 ] . set_ylim ( - 1 , + 1 ) ax [ 0 ] . set_yticks ([ - 1 , 0.0 , + 1.0 ]) ax [ 0 ] . set_title ( r \"original simulated points with $r = 0.70$ and $\\theta = \\pi/4$\" ) ax [ 1 ] . imshow ( x_observed . view ( 32 , 32 ), origin = \"lower\" , cmap = \"gray\" ) ax [ 1 ] . set_xticks ([]) ax [ 1 ] . set_yticks ([]) ax [ 1 ] . set_title ( \"noisy observed data (gray image with 32 x 32 pixels)\" ) Text(0.5, 1.0, 'noisy observed data (gray image with 32 x 32 pixels)')","title":"The simulator model"},{"location":"tutorial/05_embedding_net/#defining-an-embedding_net","text":"An inference procedure applied to the output data from this simulator model determines the posterior distribution of \\(r\\) and \\(\\theta\\) given an observation of \\(x\\) , which lives in a 1024 dimensional space (32 x 32 = 1024). To avoid working directly on these high-dimensional vectors, one can use a convolutional neural network (CNN) that takes the 32x32 images as input and encodes them into 8-dimensional feature vectors. This CNN is trained along with the neural density estimator of the inference procedure and serves as an automatic summary statistics extractor. We define and instantiate the CNN as follows: class SummaryNet ( nn . Module ): def __init__ ( self ): super () . __init__ () # 2D convolutional layer self . conv1 = nn . Conv2d ( in_channels = 1 , out_channels = 6 , kernel_size = 5 , padding = 2 ) # Maxpool layer that reduces 32x32 image to 4x4 self . pool = nn . MaxPool2d ( kernel_size = 8 , stride = 8 ) # Fully connected layer taking as input the 6 flattened output arrays from the maxpooling layer self . fc = nn . Linear ( in_features = 6 * 4 * 4 , out_features = 8 ) def forward ( self , x ): x = x . view ( - 1 , 1 , 32 , 32 ) x = self . pool ( F . relu ( self . conv1 ( x ))) x = x . view ( - 1 , 6 * 4 * 4 ) x = F . relu ( self . fc ( x )) return x embedding_net = SummaryNet ()","title":"Defining an embedding_net"},{"location":"tutorial/05_embedding_net/#the-inference-procedure","text":"With the embedding_net defined and instantiated, we can follow the usual workflow of an inference procedure in sbi . The embedding_net object appears as an input argument when instantiating the neural density estimator with utils.posterior_nn . # set prior distribution for the parameters prior = utils . BoxUniform ( low = torch . tensor ([ 0.0 , 0.0 ]), high = torch . tensor ([ 1.0 , 2 * torch . pi ]) ) # make a SBI-wrapper on the simulator object for compatibility simulator_wrapper , prior = prepare_for_sbi ( simulator_model , prior ) # instantiate the neural density estimator neural_posterior = utils . posterior_nn ( model = \"maf\" , embedding_net = embedding_net , hidden_features = 10 , num_transforms = 2 ) # setup the inference procedure with the SNPE-C procedure inference = SNPE ( prior = prior , density_estimator = neural_posterior ) # run the inference procedure on one round and 10000 simulated data points theta , x = simulate_for_sbi ( simulator_wrapper , prior , num_simulations = 10000 ) Running 10000 simulations.: 0%| | 0/10000 [00:00<?, ?it/s] density_estimator = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior ( density_estimator ) Neural network successfully converged after 76 epochs.","title":"The inference procedure"},{"location":"tutorial/05_embedding_net/#visualizing-the-results","text":"We now generate 50000 samples of the posterior distribution of \\(r\\) and \\(\\theta\\) when observing an input data point \\(x\\) generated from the simulator model with \\(r = 0.70\\) and \\(\\theta = \\pi/4\\) . # generate posterior samples true_parameter = torch . tensor ([ 0.70 , torch . pi / 4 ]) x_observed = simulator_model ( true_parameter ) samples = posterior . set_default_x ( x_observed ) . sample (( 50000 ,)) Drawing 50000 posterior samples: 0%| | 0/50000 [00:00<?, ?it/s] The figure below shows the statistics of the generated samples. # create the figure fig , ax = analysis . pairplot ( samples , points = true_parameter , labels = [ \"r\" , r \"$\\theta$\" ], limits = [[ 0 , 1 ], [ 0 , 2 * torch . pi ]], points_colors = \"r\" , points_offdiag = { \"markersize\" : 6 }, figsize = ( 5 , 5 ), )","title":"Visualizing the results"},{"location":"tutorial/07_conditional_distributions/","text":"Analysing variability and compensation mechansims with conditional distributions \u00b6 A central advantage of sbi over parameter search methods such as genetic algorithms is that the posterior captures all models that can reproduce experimental data. This allows us to analyse whether parameters can be variable or have to be narrowly tuned, and to analyse compensation mechanisms between different parameters. See also Marder and Taylor, 2011 for further motivation to identify all models that capture experimental data. In this tutorial, we will show how one can use the posterior distribution to identify whether parameters can be variable or have to be finely tuned, and how we can use the posterior to find potential compensation mechanisms between model parameters. To investigate this, we will extract conditional distributions from the posterior inferred with sbi . Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/07_conditional_distributions.ipynb in the sbi repository. Main syntax \u00b6 from sbi.analysis import conditional_pairplot , conditional_corrcoeff # Plot slices through posterior, i.e. conditionals. _ = conditional_pairplot ( density = posterior , condition = posterior . sample (( 1 ,)), limits = torch . tensor ([[ - 2.0 , 2.0 ], [ - 2.0 , 2.0 ]]), ) # Compute the matrix of correlation coefficients of the slices. cond_coeff_mat = conditional_corrcoeff ( density = posterior , condition = posterior . sample (( 1 ,)), limits = torch . tensor ([[ - 2.0 , 2.0 ], [ - 2.0 , 2.0 ]]), ) plt . imshow ( cond_coeff_mat , clim = [ - 1 , 1 ]) Analysing variability and compensation mechanisms in a toy example \u00b6 Below, we use a simple toy example to demonstrate the above described features. For an application of these features to a neuroscience problem, see figure 6 in Gon\u00e7alves, Lueckmann, Deistler et al., 2019 . from sbi import utils as utils from sbi.analysis import pairplot , conditional_pairplot , conditional_corrcoeff import torch import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from matplotlib import animation , rc from IPython.display import HTML , Image _ = torch . manual_seed ( 0 ) Let\u2019s say we have used SNPE to obtain a posterior distribution over three parameters. In this tutorial, we just load the posterior from a file: from toy_posterior_for_07_cc import ExamplePosterior posterior = ExamplePosterior () First, we specify the experimental observation \\(x_o\\) at which we want to evaluate and sample the posterior \\(p(\\theta|x_o)\\) : x_o = torch . ones ( 1 , 20 ) # simulator output was 20-dimensional posterior . set_default_x ( x_o ) As always, we can inspect the posterior marginals with the pairplot() function: posterior_samples = posterior . sample (( 5000 ,)) fig , ax = pairplot ( samples = posterior_samples , limits = torch . tensor ([[ - 2.0 , 2.0 ]] * 3 ), offdiag = [ \"kde\" ], diag = [ \"kde\" ], figsize = ( 5 , 5 ), ) The 1D and 2D marginals of the posterior fill almost the entire parameter space! Also, the Pearson correlation coefficient matrix of the marginal shows rather weak interactions (low correlations): corr_matrix_marginal = np . corrcoef ( posterior_samples . T ) fig , ax = plt . subplots ( 1 , 1 , figsize = ( 4 , 4 )) im = plt . imshow ( corr_matrix_marginal , clim = [ - 1 , 1 ], cmap = \"PiYG\" ) _ = fig . colorbar ( im ) It might be tempting to conclude that the experimental data barely constrains our parameters and that almost all parameter combinations can reproduce the experimental data. As we will show below, this is not the case. Because our toy posterior has only three parameters, we can plot posterior samples in a 3D plot: rc ( \"animation\" , html = \"html5\" ) # First set up the figure, the axis, and the plot element we want to animate fig = plt . figure ( figsize = ( 6 , 6 )) ax = fig . add_subplot ( 111 , projection = \"3d\" ) ax . set_xlim (( - 2 , 2 )) ax . set_ylim (( - 2 , 2 )) def init (): ( line ,) = ax . plot ([], [], lw = 2 ) line . set_data ([], []) return ( line ,) def animate ( angle ): num_samples_vis = 1000 line = ax . scatter ( posterior_samples [: num_samples_vis , 0 ], posterior_samples [: num_samples_vis , 1 ], posterior_samples [: num_samples_vis , 2 ], zdir = \"z\" , s = 15 , c = \"#2171b5\" , depthshade = False , ) ax . view_init ( 20 , angle ) return ( line ,) anim = animation . FuncAnimation ( fig , animate , init_func = init , frames = range ( 0 , 360 , 5 ), interval = 150 , blit = True ) plt . close () HTML ( anim . to_html5_video ()) Your browser does not support the video tag. Clearly, the range of admissible parameters is constrained to a narrow region in parameter space, which had not been evident from the marginals. If the posterior has more than three dimensions, inspecting all dimensions at once will not be possible anymore. One way to still reveal structures in high-dimensional posteriors is to inspect 2D-slices through the posterior. In sbi , this can be done with the conditional_pairplot() function, which computes the conditional distributions within the posterior. We can slice (i.e. condition) the posterior at any location, given by the condition . In the plot below, for all upper diagonal plots, we keep all but two parameters constant at values sampled from the posterior, and inspect what combinations of the remaining two parameters can reproduce experimental data. For the plots on the diagonal (the 1D conditionals), we keep all but one parameter constant. condition = posterior . sample (( 1 ,)) _ = conditional_pairplot ( density = posterior , condition = condition , limits = torch . tensor ([[ - 2.0 , 2.0 ]] * 3 ), figsize = ( 5 , 5 ), ) This plot looks completely different from the marginals obtained with pairplot() . As it can be seen on the diagonal plots, if all parameters but one are kept constant, the remaining parameter has to be tuned to a narrow region in parameter space. In addition, the upper diagonal plots show strong correlations: deviations in one parameter can be compensated through changes in another parameter. We can summarize these correlations in a conditional correlation matrix, which computes the Pearson correlation coefficient of each of these pairwise plots. This matrix (below) shows strong correlations between many parameters, which can be interpreted as potential compensation mechansims: cond_coeff_mat = conditional_corrcoeff ( density = posterior , condition = condition , limits = torch . tensor ([[ - 2.0 , 2.0 ]] * 3 ), ) fig , ax = plt . subplots ( 1 , 1 , figsize = ( 4 , 4 )) im = plt . imshow ( cond_coeff_mat , clim = [ - 1 , 1 ], cmap = \"PiYG\" ) _ = fig . colorbar ( im ) So far, we have investigated the conditional distribution only at a specific condition sampled from the posterior. In many applications, it makes sense to repeat the above analyses with a different condition (another sample from the posterior), which can be interpreted as slicing the posterior at a different location. Note that conditional_corrcoeff() can directly compute the matrix for several conditions and then outputs the average over them. This can be done by passing a batch of \\(N\\) conditions as the condition argument. Sampling conditional distributions \u00b6 So far, we have demonstrated how one can plot 2D conditional distributions with conditional_pairplot() and how one can compute the pairwise conditional correlation coefficient with conditional_corrcoeff() . In some cases, it can be useful to keep a subset of parameters fixed and to vary more than two parameters. This can be done by sampling the conditonal posterior \\(p(\\theta_i | \\theta_{j \\neq i}, x_o)\\) . As of sbi v0.18.0 , this functionality requires using the sampler interface . In this tutorial, we demonstrate this functionality on a linear gaussian simulator with four parameters. We would like to fix the forth parameter to \\(\\theta_4=0.2\\) and sample the first three parameters given that value, i.e. we want to sample \\(p(\\theta_1, \\theta_2, \\theta_3 | \\theta_4 = 0.2, x_o)\\) . For an application in neuroscience, see Deistler, Gon\u00e7alves, Macke, 2021 . In this tutorial, we will use SNPE, but the same also works for SNLE and SNRE. First, we define the prior and the simulator and train the deep neural density estimator: from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.inference import posterior_estimator_based_potential , MCMCPosterior from sbi.utils import BoxUniform import torch num_dim = 4 prior = BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) inference = SNPE () theta , x = simulate_for_sbi ( simulator , prior , 1000 ) posterior_estimator = inference . append_simulations ( theta , x ) . train () Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Neural network successfully converged after 147 epochs. Next, we follow the sampler interface and create a potential_function. The observation in this example is x_o=[1, 1, 1, 1] . potential_fn , theta_transform = posterior_estimator_based_potential ( posterior_estimator , prior = prior , x_o = torch . ones ( 4 ) ) Now we want to build the conditional potential (please read throught the sampler interface tutorial for an explanation of potential functions). For this, we have to pass a condition . In our case, we want to condition the forth parameter on \\(\\theta_4=0.2\\) . Regardless of how many parameters one wants to condition on, in sbi , one has to pass a condition value for all parameters. The first three values will simply be ignored. We can tell the algorithm which parameters should be kept fixed and which ones should be sampled with the argument dims_to_sample . from sbi.analysis import conditional_potential conditioned_potential_fn , restricted_tf , restricted_prior = conditional_potential ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , condition = torch . as_tensor ( [ 0.0 , 0.0 , 0.0 , 0.2 ] ), # the first three values are arbitrary and are ignored internally dims_to_sample = [ 0 , 1 , 2 ], ) Finally, we have to build a sampler for the conditioned_potential_fn . E.g., we can sample the conditional posterior with MCMC: mcmc_posterior = MCMCPosterior ( potential_fn = conditioned_potential_fn , theta_transform = restricted_tf , proposal = restricted_prior , ) cond_samples = mcmc_posterior . sample (( 100 ,)) 0%| | 0/50 [00:00<?, ?it/s] 0%| | 0/10 [00:00<?, ?it/s] 0%| | 0/100 [00:00<?, ?it/s] The resulting samples are 3-dimensional, corresponding to \\([\\theta_1, \\theta_2, \\theta_3]\\) , sampled from \\(p(\\theta_1, \\theta_2, \\theta_3 | \\theta_4=0.2, x_o)\\) . print ( cond_samples . shape ) torch.Size([100, 3]) We can also plot them with pairplot : from sbi.analysis import pairplot _ = pairplot ( cond_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 4 , 4 ))","title":"Conditional distributions"},{"location":"tutorial/07_conditional_distributions/#analysing-variability-and-compensation-mechansims-with-conditional-distributions","text":"A central advantage of sbi over parameter search methods such as genetic algorithms is that the posterior captures all models that can reproduce experimental data. This allows us to analyse whether parameters can be variable or have to be narrowly tuned, and to analyse compensation mechanisms between different parameters. See also Marder and Taylor, 2011 for further motivation to identify all models that capture experimental data. In this tutorial, we will show how one can use the posterior distribution to identify whether parameters can be variable or have to be finely tuned, and how we can use the posterior to find potential compensation mechanisms between model parameters. To investigate this, we will extract conditional distributions from the posterior inferred with sbi . Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/07_conditional_distributions.ipynb in the sbi repository.","title":"Analysing variability and compensation mechansims with conditional distributions"},{"location":"tutorial/07_conditional_distributions/#main-syntax","text":"from sbi.analysis import conditional_pairplot , conditional_corrcoeff # Plot slices through posterior, i.e. conditionals. _ = conditional_pairplot ( density = posterior , condition = posterior . sample (( 1 ,)), limits = torch . tensor ([[ - 2.0 , 2.0 ], [ - 2.0 , 2.0 ]]), ) # Compute the matrix of correlation coefficients of the slices. cond_coeff_mat = conditional_corrcoeff ( density = posterior , condition = posterior . sample (( 1 ,)), limits = torch . tensor ([[ - 2.0 , 2.0 ], [ - 2.0 , 2.0 ]]), ) plt . imshow ( cond_coeff_mat , clim = [ - 1 , 1 ])","title":"Main syntax"},{"location":"tutorial/07_conditional_distributions/#analysing-variability-and-compensation-mechanisms-in-a-toy-example","text":"Below, we use a simple toy example to demonstrate the above described features. For an application of these features to a neuroscience problem, see figure 6 in Gon\u00e7alves, Lueckmann, Deistler et al., 2019 . from sbi import utils as utils from sbi.analysis import pairplot , conditional_pairplot , conditional_corrcoeff import torch import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from matplotlib import animation , rc from IPython.display import HTML , Image _ = torch . manual_seed ( 0 ) Let\u2019s say we have used SNPE to obtain a posterior distribution over three parameters. In this tutorial, we just load the posterior from a file: from toy_posterior_for_07_cc import ExamplePosterior posterior = ExamplePosterior () First, we specify the experimental observation \\(x_o\\) at which we want to evaluate and sample the posterior \\(p(\\theta|x_o)\\) : x_o = torch . ones ( 1 , 20 ) # simulator output was 20-dimensional posterior . set_default_x ( x_o ) As always, we can inspect the posterior marginals with the pairplot() function: posterior_samples = posterior . sample (( 5000 ,)) fig , ax = pairplot ( samples = posterior_samples , limits = torch . tensor ([[ - 2.0 , 2.0 ]] * 3 ), offdiag = [ \"kde\" ], diag = [ \"kde\" ], figsize = ( 5 , 5 ), ) The 1D and 2D marginals of the posterior fill almost the entire parameter space! Also, the Pearson correlation coefficient matrix of the marginal shows rather weak interactions (low correlations): corr_matrix_marginal = np . corrcoef ( posterior_samples . T ) fig , ax = plt . subplots ( 1 , 1 , figsize = ( 4 , 4 )) im = plt . imshow ( corr_matrix_marginal , clim = [ - 1 , 1 ], cmap = \"PiYG\" ) _ = fig . colorbar ( im ) It might be tempting to conclude that the experimental data barely constrains our parameters and that almost all parameter combinations can reproduce the experimental data. As we will show below, this is not the case. Because our toy posterior has only three parameters, we can plot posterior samples in a 3D plot: rc ( \"animation\" , html = \"html5\" ) # First set up the figure, the axis, and the plot element we want to animate fig = plt . figure ( figsize = ( 6 , 6 )) ax = fig . add_subplot ( 111 , projection = \"3d\" ) ax . set_xlim (( - 2 , 2 )) ax . set_ylim (( - 2 , 2 )) def init (): ( line ,) = ax . plot ([], [], lw = 2 ) line . set_data ([], []) return ( line ,) def animate ( angle ): num_samples_vis = 1000 line = ax . scatter ( posterior_samples [: num_samples_vis , 0 ], posterior_samples [: num_samples_vis , 1 ], posterior_samples [: num_samples_vis , 2 ], zdir = \"z\" , s = 15 , c = \"#2171b5\" , depthshade = False , ) ax . view_init ( 20 , angle ) return ( line ,) anim = animation . FuncAnimation ( fig , animate , init_func = init , frames = range ( 0 , 360 , 5 ), interval = 150 , blit = True ) plt . close () HTML ( anim . to_html5_video ()) Your browser does not support the video tag. Clearly, the range of admissible parameters is constrained to a narrow region in parameter space, which had not been evident from the marginals. If the posterior has more than three dimensions, inspecting all dimensions at once will not be possible anymore. One way to still reveal structures in high-dimensional posteriors is to inspect 2D-slices through the posterior. In sbi , this can be done with the conditional_pairplot() function, which computes the conditional distributions within the posterior. We can slice (i.e. condition) the posterior at any location, given by the condition . In the plot below, for all upper diagonal plots, we keep all but two parameters constant at values sampled from the posterior, and inspect what combinations of the remaining two parameters can reproduce experimental data. For the plots on the diagonal (the 1D conditionals), we keep all but one parameter constant. condition = posterior . sample (( 1 ,)) _ = conditional_pairplot ( density = posterior , condition = condition , limits = torch . tensor ([[ - 2.0 , 2.0 ]] * 3 ), figsize = ( 5 , 5 ), ) This plot looks completely different from the marginals obtained with pairplot() . As it can be seen on the diagonal plots, if all parameters but one are kept constant, the remaining parameter has to be tuned to a narrow region in parameter space. In addition, the upper diagonal plots show strong correlations: deviations in one parameter can be compensated through changes in another parameter. We can summarize these correlations in a conditional correlation matrix, which computes the Pearson correlation coefficient of each of these pairwise plots. This matrix (below) shows strong correlations between many parameters, which can be interpreted as potential compensation mechansims: cond_coeff_mat = conditional_corrcoeff ( density = posterior , condition = condition , limits = torch . tensor ([[ - 2.0 , 2.0 ]] * 3 ), ) fig , ax = plt . subplots ( 1 , 1 , figsize = ( 4 , 4 )) im = plt . imshow ( cond_coeff_mat , clim = [ - 1 , 1 ], cmap = \"PiYG\" ) _ = fig . colorbar ( im ) So far, we have investigated the conditional distribution only at a specific condition sampled from the posterior. In many applications, it makes sense to repeat the above analyses with a different condition (another sample from the posterior), which can be interpreted as slicing the posterior at a different location. Note that conditional_corrcoeff() can directly compute the matrix for several conditions and then outputs the average over them. This can be done by passing a batch of \\(N\\) conditions as the condition argument.","title":"Analysing variability and compensation mechanisms in a toy example"},{"location":"tutorial/07_conditional_distributions/#sampling-conditional-distributions","text":"So far, we have demonstrated how one can plot 2D conditional distributions with conditional_pairplot() and how one can compute the pairwise conditional correlation coefficient with conditional_corrcoeff() . In some cases, it can be useful to keep a subset of parameters fixed and to vary more than two parameters. This can be done by sampling the conditonal posterior \\(p(\\theta_i | \\theta_{j \\neq i}, x_o)\\) . As of sbi v0.18.0 , this functionality requires using the sampler interface . In this tutorial, we demonstrate this functionality on a linear gaussian simulator with four parameters. We would like to fix the forth parameter to \\(\\theta_4=0.2\\) and sample the first three parameters given that value, i.e. we want to sample \\(p(\\theta_1, \\theta_2, \\theta_3 | \\theta_4 = 0.2, x_o)\\) . For an application in neuroscience, see Deistler, Gon\u00e7alves, Macke, 2021 . In this tutorial, we will use SNPE, but the same also works for SNLE and SNRE. First, we define the prior and the simulator and train the deep neural density estimator: from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.inference import posterior_estimator_based_potential , MCMCPosterior from sbi.utils import BoxUniform import torch num_dim = 4 prior = BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) inference = SNPE () theta , x = simulate_for_sbi ( simulator , prior , 1000 ) posterior_estimator = inference . append_simulations ( theta , x ) . train () Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Neural network successfully converged after 147 epochs. Next, we follow the sampler interface and create a potential_function. The observation in this example is x_o=[1, 1, 1, 1] . potential_fn , theta_transform = posterior_estimator_based_potential ( posterior_estimator , prior = prior , x_o = torch . ones ( 4 ) ) Now we want to build the conditional potential (please read throught the sampler interface tutorial for an explanation of potential functions). For this, we have to pass a condition . In our case, we want to condition the forth parameter on \\(\\theta_4=0.2\\) . Regardless of how many parameters one wants to condition on, in sbi , one has to pass a condition value for all parameters. The first three values will simply be ignored. We can tell the algorithm which parameters should be kept fixed and which ones should be sampled with the argument dims_to_sample . from sbi.analysis import conditional_potential conditioned_potential_fn , restricted_tf , restricted_prior = conditional_potential ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , condition = torch . as_tensor ( [ 0.0 , 0.0 , 0.0 , 0.2 ] ), # the first three values are arbitrary and are ignored internally dims_to_sample = [ 0 , 1 , 2 ], ) Finally, we have to build a sampler for the conditioned_potential_fn . E.g., we can sample the conditional posterior with MCMC: mcmc_posterior = MCMCPosterior ( potential_fn = conditioned_potential_fn , theta_transform = restricted_tf , proposal = restricted_prior , ) cond_samples = mcmc_posterior . sample (( 100 ,)) 0%| | 0/50 [00:00<?, ?it/s] 0%| | 0/10 [00:00<?, ?it/s] 0%| | 0/100 [00:00<?, ?it/s] The resulting samples are 3-dimensional, corresponding to \\([\\theta_1, \\theta_2, \\theta_3]\\) , sampled from \\(p(\\theta_1, \\theta_2, \\theta_3 | \\theta_4=0.2, x_o)\\) . print ( cond_samples . shape ) torch.Size([100, 3]) We can also plot them with pairplot : from sbi.analysis import pairplot _ = pairplot ( cond_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 4 , 4 ))","title":"Sampling conditional distributions"},{"location":"tutorial/08_restriction_estimator/","text":"Efficient handling of invalid simulation outputs \u00b6 For many simulators, the output of the simulator can be ill-defined or it can have non-sensical values. For example, in neuroscience models, if a specific parameter set does not produce a spike, features such as the spike shape can not be computed. When using sbi , such simulations that have NaN or inf in their output are discarded during neural network training. This can lead to inefficetive use of simulation budget: we carry out many simulations, but a potentially large fraction of them is discarded. In this tutorial, we show how we can use sbi to learn regions in parameter space that produce valid simulation outputs, and thereby improve the sampling efficiency. The key idea of the method is to use a classifier to distinguish parameters that lead to valid simulations from regions that lead to invalid simulations. After we have obtained the region in parameter space that produes valid simulation outputs, we train the deep neural density estimator used in SNPE . The method was originally proposed in Lueckmann, Goncalves et al. 2017 and later used in Deistler et al. 2021 . Main syntax \u00b6 from sbi.inference import SNPE from sbi.utils import RestrictionEstimator restriction_estimator = RestrictionEstimator ( prior = prior ) proposals = [ prior ] for r in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposals [ - 1 ], 1000 ) restriction_estimator . append_simulations ( theta , x ) if ( r < num_rounds - 1 ): # training not needed in last round because classifier will not be used anymore. classifier = restriction_estimator . train () proposals . append ( restriction_estimator . restrict_prior ()) all_theta , all_x , _ = restriction_estimator . get_simulations () inference = SNPE ( prior = prior ) density_estimator = inference . append_simulations ( all_theta , all_x ) . train () posterior = inference . build_posterior () Further explanation in a toy example \u00b6 from sbi.inference import SNPE , simulate_for_sbi from sbi.utils import RestrictionEstimator , BoxUniform from sbi.analysis import pairplot import torch _ = torch . manual_seed ( 2 ) We will define a simulator with two parameters and two simulation outputs. The simulator produces NaN whenever the first parameter is below 0.0 . If it is above 0.0 the simulator simply perturbs the parameter set with Gaussian noise: def simulator ( theta ): perturbed_theta = theta + 0.5 * torch . randn ( 2 ) perturbed_theta [ theta [:, 0 ] < 0.0 ] = torch . as_tensor ([ float ( \"nan\" ), float ( \"nan\" )]) return perturbed_theta The prior is a uniform distribution in [-2, 2]: prior = BoxUniform ( - 2 * torch . ones ( 2 ), 2 * torch . ones ( 2 )) We then begin by drawing samples from the prior and simulating them. Looking at the simulation outputs, half of them contain NaN : theta , x = simulate_for_sbi ( simulator , prior , 1000 ) print ( \"Simulation outputs: \" , x ) Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Simulation outputs: tensor([[ 0.0411, -0.5656], [ 0.0096, -1.0841], [ 1.2937, 0.9448], ..., [ nan, nan], [ nan, nan], [ 2.7940, 0.6461]]) The simulations that contain NaN are wasted, and we want to learn to \u201crestrict\u201d the prior such that it produces only valid simulation outputs. To do so, we set up the RestrictionEstimator : restriction_estimator = RestrictionEstimator ( prior = prior ) The RestrictionEstimator trains a classifier to distinguish parameters that lead to valid simulation outputs from parameters that lead to invalid simulation outputs restriction_estimator . append_simulations ( theta , x ) classifier = restriction_estimator . train () Training neural network. Epochs trained: 35 We can inspect the restricted_prior , i.e. the parameters that the classifier believes will lead to valid simulation outputs, with: restricted_prior = restriction_estimator . restrict_prior () samples = restricted_prior . sample (( 10_000 ,)) _ = pairplot ( samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ]], fig_size = ( 4 , 4 )) The classifier rejected 51.6% of all samples. You will get a speed-up of 106.5%. Indeed, parameter sets sampled from the restricted_prior always have a first parameter larger than 0.0 . These are the ones that produce valid simulation outputs (see our definition of the simulator above). We can then use the restricted_prior to generate more simulations. Almost all of them will have valid simulation outputs: new_theta , new_x = simulate_for_sbi ( simulator , restricted_prior , 1000 ) print ( \"Simulation outputs: \" , new_x ) The classifier rejected 50.9% of all samples. You will get a speed-up of 103.6%. Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Simulation outputs: tensor([[ 0.6834, -0.2415], [ 1.3459, 1.5373], [ 2.1092, 1.9180], ..., [ 0.8845, 0.4036], [ 1.9111, 1.2526], [ 0.8320, 2.3755]]) We can now use all simulations and run SNPE as always: restriction_estimator . append_simulations ( new_theta , new_x ) # Gather the new simulations in the `restriction_estimator`. ( all_theta , all_x , _ , ) = restriction_estimator . get_simulations () # Get all simulations run so far. inference = SNPE ( prior = prior ) density_estimator = inference . append_simulations ( all_theta , all_x ) . train () posterior = inference . build_posterior () posterior_samples = posterior . sample (( 10_000 ,), x = torch . ones ( 2 )) _ = pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ]], fig_size = ( 3 , 3 )) WARNING:root:Found 523 NaN simulations and 0 Inf simulations. They will be excluded from training. Neural network successfully converged after 118 epochs. Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] Further options for tuning the algorithm \u00b6 the whole procedure can be repeated many times (see the loop shown in \u201cMain syntax\u201d in this tutorial) the classifier is trained to be relatively conservative, i.e. it will try to be very sure that a specific parameter set can indeed not produce valid simulation outputs. If you are ok with the restricted prior potentially ignoring a small fraction of parameter sets that might have produced valid data, you can use restriction_estimator.restrict_prior(allowed_false_negatives=...) . The argument allowed_false_negatives sets the fraction of potentially ignored parameter sets. A higher value will lead to more valid simulations. By default, the algorithm considers simulations that have at least one NaN of inf as invalid . You can specify custom criterions with RestrictionEstimator(decision_criterion=...)","title":"Handling invalid simulations"},{"location":"tutorial/08_restriction_estimator/#efficient-handling-of-invalid-simulation-outputs","text":"For many simulators, the output of the simulator can be ill-defined or it can have non-sensical values. For example, in neuroscience models, if a specific parameter set does not produce a spike, features such as the spike shape can not be computed. When using sbi , such simulations that have NaN or inf in their output are discarded during neural network training. This can lead to inefficetive use of simulation budget: we carry out many simulations, but a potentially large fraction of them is discarded. In this tutorial, we show how we can use sbi to learn regions in parameter space that produce valid simulation outputs, and thereby improve the sampling efficiency. The key idea of the method is to use a classifier to distinguish parameters that lead to valid simulations from regions that lead to invalid simulations. After we have obtained the region in parameter space that produes valid simulation outputs, we train the deep neural density estimator used in SNPE . The method was originally proposed in Lueckmann, Goncalves et al. 2017 and later used in Deistler et al. 2021 .","title":"Efficient handling of invalid simulation outputs"},{"location":"tutorial/08_restriction_estimator/#main-syntax","text":"from sbi.inference import SNPE from sbi.utils import RestrictionEstimator restriction_estimator = RestrictionEstimator ( prior = prior ) proposals = [ prior ] for r in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposals [ - 1 ], 1000 ) restriction_estimator . append_simulations ( theta , x ) if ( r < num_rounds - 1 ): # training not needed in last round because classifier will not be used anymore. classifier = restriction_estimator . train () proposals . append ( restriction_estimator . restrict_prior ()) all_theta , all_x , _ = restriction_estimator . get_simulations () inference = SNPE ( prior = prior ) density_estimator = inference . append_simulations ( all_theta , all_x ) . train () posterior = inference . build_posterior ()","title":"Main syntax"},{"location":"tutorial/08_restriction_estimator/#further-explanation-in-a-toy-example","text":"from sbi.inference import SNPE , simulate_for_sbi from sbi.utils import RestrictionEstimator , BoxUniform from sbi.analysis import pairplot import torch _ = torch . manual_seed ( 2 ) We will define a simulator with two parameters and two simulation outputs. The simulator produces NaN whenever the first parameter is below 0.0 . If it is above 0.0 the simulator simply perturbs the parameter set with Gaussian noise: def simulator ( theta ): perturbed_theta = theta + 0.5 * torch . randn ( 2 ) perturbed_theta [ theta [:, 0 ] < 0.0 ] = torch . as_tensor ([ float ( \"nan\" ), float ( \"nan\" )]) return perturbed_theta The prior is a uniform distribution in [-2, 2]: prior = BoxUniform ( - 2 * torch . ones ( 2 ), 2 * torch . ones ( 2 )) We then begin by drawing samples from the prior and simulating them. Looking at the simulation outputs, half of them contain NaN : theta , x = simulate_for_sbi ( simulator , prior , 1000 ) print ( \"Simulation outputs: \" , x ) Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Simulation outputs: tensor([[ 0.0411, -0.5656], [ 0.0096, -1.0841], [ 1.2937, 0.9448], ..., [ nan, nan], [ nan, nan], [ 2.7940, 0.6461]]) The simulations that contain NaN are wasted, and we want to learn to \u201crestrict\u201d the prior such that it produces only valid simulation outputs. To do so, we set up the RestrictionEstimator : restriction_estimator = RestrictionEstimator ( prior = prior ) The RestrictionEstimator trains a classifier to distinguish parameters that lead to valid simulation outputs from parameters that lead to invalid simulation outputs restriction_estimator . append_simulations ( theta , x ) classifier = restriction_estimator . train () Training neural network. Epochs trained: 35 We can inspect the restricted_prior , i.e. the parameters that the classifier believes will lead to valid simulation outputs, with: restricted_prior = restriction_estimator . restrict_prior () samples = restricted_prior . sample (( 10_000 ,)) _ = pairplot ( samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ]], fig_size = ( 4 , 4 )) The classifier rejected 51.6% of all samples. You will get a speed-up of 106.5%. Indeed, parameter sets sampled from the restricted_prior always have a first parameter larger than 0.0 . These are the ones that produce valid simulation outputs (see our definition of the simulator above). We can then use the restricted_prior to generate more simulations. Almost all of them will have valid simulation outputs: new_theta , new_x = simulate_for_sbi ( simulator , restricted_prior , 1000 ) print ( \"Simulation outputs: \" , new_x ) The classifier rejected 50.9% of all samples. You will get a speed-up of 103.6%. Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Simulation outputs: tensor([[ 0.6834, -0.2415], [ 1.3459, 1.5373], [ 2.1092, 1.9180], ..., [ 0.8845, 0.4036], [ 1.9111, 1.2526], [ 0.8320, 2.3755]]) We can now use all simulations and run SNPE as always: restriction_estimator . append_simulations ( new_theta , new_x ) # Gather the new simulations in the `restriction_estimator`. ( all_theta , all_x , _ , ) = restriction_estimator . get_simulations () # Get all simulations run so far. inference = SNPE ( prior = prior ) density_estimator = inference . append_simulations ( all_theta , all_x ) . train () posterior = inference . build_posterior () posterior_samples = posterior . sample (( 10_000 ,), x = torch . ones ( 2 )) _ = pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ]], fig_size = ( 3 , 3 )) WARNING:root:Found 523 NaN simulations and 0 Inf simulations. They will be excluded from training. Neural network successfully converged after 118 epochs. Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s]","title":"Further explanation in a toy example"},{"location":"tutorial/08_restriction_estimator/#further-options-for-tuning-the-algorithm","text":"the whole procedure can be repeated many times (see the loop shown in \u201cMain syntax\u201d in this tutorial) the classifier is trained to be relatively conservative, i.e. it will try to be very sure that a specific parameter set can indeed not produce valid simulation outputs. If you are ok with the restricted prior potentially ignoring a small fraction of parameter sets that might have produced valid data, you can use restriction_estimator.restrict_prior(allowed_false_negatives=...) . The argument allowed_false_negatives sets the fraction of potentially ignored parameter sets. A higher value will lead to more valid simulations. By default, the algorithm considers simulations that have at least one NaN of inf as invalid . You can specify custom criterions with RestrictionEstimator(decision_criterion=...)","title":"Further options for tuning the algorithm"},{"location":"tutorial/09_sensitivity_analysis/","text":"Active subspaces for sensitivity analysis \u00b6 A standard method to analyse dynamical systems such as models of neural dynamics is to use a sensitivity analysis. We can use the posterior obtained with sbi , to perform such analyses. Main syntax \u00b6 from sbi.analysis import ActiveSubspace sensitivity = ActiveSubspace ( posterior . set_default_x ( x_o )) e_vals , e_vecs = sensitivity . find_directions ( posterior_log_prob_as_property = True ) projected_data = sensitivity . project ( theta_project , num_dimensions = 1 ) Example and further explanation \u00b6 import torch from torch.distributions import MultivariateNormal from sbi.analysis import ActiveSubspace , pairplot from sbi.simulators import linear_gaussian from sbi.inference import simulate_for_sbi , infer _ = torch . manual_seed ( 0 ) Let\u2019s define a simple Gaussian toy example: prior = MultivariateNormal ( 0.0 * torch . ones ( 2 ), 2 * torch . eye ( 2 )) def simulator ( theta ): return linear_gaussian ( theta , - 0.8 * torch . ones ( 2 ), torch . tensor ([[ 1.0 , 0.98 ], [ 0.98 , 1.0 ]]) ) posterior = infer ( simulator , prior , num_simulations = 2000 , method = \"SNPE\" ) . set_default_x ( torch . zeros ( 2 ) ) Running 2000 simulations.: 0%| | 0/2000 [00:00<?, ?it/s] Neural network successfully converged after 117 epochs. posterior_samples = posterior . sample (( 2000 ,)) Drawing 2000 posterior samples: 0%| | 0/2000 [00:00<?, ?it/s] _ = pairplot ( posterior_samples , limits = [[ - 3 , 3 ], [ - 3 , 3 ]], figsize = ( 4 , 4 )) When performing a sensitivity analysis on this model, we would expect that there is one direction that is less sensitive (from bottom left to top right, along the vector [1, 1]) and one direction that is more sensitive (from top left to bottom right, along [1, -1]). We can recover these directions with the ActiveSubspace module in sbi . sensitivity = ActiveSubspace ( posterior ) e_vals , e_vecs = sensitivity . find_directions ( posterior_log_prob_as_property = True ) Drawing 1000 posterior samples: 0%| | 0/1000 [00:00<?, ?it/s] The method .find_active() returns eigenvalues and the corresponding eigenvectors. It does so by computing the matrix: \\(M = \\mathbb{E}_{p(\\theta|x_o)}[\\nabla_{\\theta}p(\\theta|x_o)^T \\nabla_{\\theta}p(\\theta|x_o)\\) ] It then does an eigendecomposition: \\(M = Q \\Lambda Q^{-1}\\) A strong eigenvalue indicates that the gradient of the posterior density is large, i.e. the system output is sensitive to changes along the direction of the corresponding eigenvector (or active ). The eigenvalue corresponding to the vector [0.68, -0.73] is much larger than the eigenvalue of [0.73, 0.67] . This matches the intuition we developed above. print ( \"Eigenvalues: \\n \" , e_vals , \" \\n \" ) print ( \"Eigenvectors: \\n \" , e_vecs ) Eigenvalues: tensor([2.3552e-06, 7.0754e-05]) Eigenvectors: tensor([[-0.7066, -0.7076], [-0.7076, 0.7066]]) Lastly, we can project the data into the active dimensions. In this case, we will just use one active dimension: projected_data = sensitivity . project ( posterior_samples , num_dimensions = 1 ) Some technical details \u00b6 The gradients and thus the eigenvectors are computed in z-scored space. The mean and standard deviation are computed w.r.t. the prior distribution. Thus, the gradients (and thus the eigenvales) reflect changes on the scale of the prior. The expected value used to compute the matrix \\(M\\) is estimated using 1000 posterior samples. This value can be set with the .find_active(num_monte_carlo_samples=...) variable. How does this relate to Principal Component Analysis (PCA)? In the example above, the results of PCA would be very similar. However, there are two main differences to PCA: First, PCA ignores local changes in the posterior, whereas the active subspace can change a lot (since it computes the gradient, which is a local quantity). Second, active subspaces can be used characterize the sensitivity of any other quantity w.r.t. circuit parameters. This is outlined below: Computing the sensitivity of a specific summary statistic \u00b6 Above, we have shown how to identify directions along which the posterior probability changes rapidly. Notably, the posterior probability reflects how consistent a specific parameter set is with all summary statistics, i.e. the entire \\(x_o\\) . Sometimes, we might be interested in investigating how a specific features is influenced by the parameters. This feature could be one of the values of \\(x_o\\) , but it could also be a different property. As a neuroscience example, in Deistler et al. 2021, we obtained the posterior distribution given burst durations and delays between bursts. After having obtained the posterior, we then wanted to analyse the sensitivity of metabolic cost w.r.t. circuit parameters. The framework we presented above can easily be extended to study such questions. prior = MultivariateNormal ( 0.0 * torch . ones ( 2 ), 2 * torch . eye ( 2 )) def simulator ( theta ): return linear_gaussian ( theta , - 0.8 * torch . ones ( 2 ), torch . eye ( 2 )) posterior = infer ( simulator , prior , num_simulations = 2000 , method = \"SNPE\" ) . set_default_x ( torch . zeros ( 2 ) ) Running 2000 simulations.: 0%| | 0/2000 [00:00<?, ?it/s] Neural network successfully converged after 139 epochs. _ = pairplot ( posterior . sample (( 10_000 ,)), limits = [[ - 3 , 3 ], [ - 3 , 3 ]], figsize = ( 4 , 4 )) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] sensitivity = ActiveSubspace ( posterior ) This time, we begin by drawing samples from the posterior and then computing the desired property for each of the samples (i.e. you will probably have to run simulations for each theta and extract the property from the simulation output). As an example, we assume that the property is just the cube of the first dimension of the simulation output: theta , x = simulate_for_sbi ( simulator , posterior , 5000 ) property_ = x [:, : 1 ] ** 3 # E.g. metabolic cost. Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Running 5000 simulations.: 0%| | 0/5000 [00:00<?, ?it/s] To investigate the sensitivity of a given parameter, we train a neural network to predict the property_ from the parameters and then analyse the neural network as above: \\(M = \\mathbb{E}_{p(\\theta|x_o)}[\\nabla_{\\theta}f(\\theta)^T \\nabla_{\\theta}f(\\theta)\\) ] where \\(f(\\cdot)\\) is the trained neural network. _ = sensitivity . add_property ( theta , property_ ) . train () e_vals , e_vecs = sensitivity . find_directions () Training neural network. Epochs trained: 24 Drawing 1000 posterior samples: 0%| | 0/1000 [00:00<?, ?it/s] print ( \"Eigenvalues: \\n \" , e_vals , \" \\n \" ) print ( \"Eigenvectors: \\n \" , e_vecs ) Eigenvalues: tensor([2.8801e-06, 6.1131e-05]) Eigenvectors: tensor([[ 0.0362, 0.9993], [ 0.9993, -0.0362]]) As we can see, one of the eigenvalues is much smaller than the other one. The larger eigenvalue represents (approximately) the vector [1.0, 0.0] . This makes sense, because only the property_ is influenced only by the first output which, in turn, is influenced only by the first parameter.","title":"Posterior sensitivity analysis"},{"location":"tutorial/09_sensitivity_analysis/#active-subspaces-for-sensitivity-analysis","text":"A standard method to analyse dynamical systems such as models of neural dynamics is to use a sensitivity analysis. We can use the posterior obtained with sbi , to perform such analyses.","title":"Active subspaces for sensitivity analysis"},{"location":"tutorial/09_sensitivity_analysis/#main-syntax","text":"from sbi.analysis import ActiveSubspace sensitivity = ActiveSubspace ( posterior . set_default_x ( x_o )) e_vals , e_vecs = sensitivity . find_directions ( posterior_log_prob_as_property = True ) projected_data = sensitivity . project ( theta_project , num_dimensions = 1 )","title":"Main syntax"},{"location":"tutorial/09_sensitivity_analysis/#example-and-further-explanation","text":"import torch from torch.distributions import MultivariateNormal from sbi.analysis import ActiveSubspace , pairplot from sbi.simulators import linear_gaussian from sbi.inference import simulate_for_sbi , infer _ = torch . manual_seed ( 0 ) Let\u2019s define a simple Gaussian toy example: prior = MultivariateNormal ( 0.0 * torch . ones ( 2 ), 2 * torch . eye ( 2 )) def simulator ( theta ): return linear_gaussian ( theta , - 0.8 * torch . ones ( 2 ), torch . tensor ([[ 1.0 , 0.98 ], [ 0.98 , 1.0 ]]) ) posterior = infer ( simulator , prior , num_simulations = 2000 , method = \"SNPE\" ) . set_default_x ( torch . zeros ( 2 ) ) Running 2000 simulations.: 0%| | 0/2000 [00:00<?, ?it/s] Neural network successfully converged after 117 epochs. posterior_samples = posterior . sample (( 2000 ,)) Drawing 2000 posterior samples: 0%| | 0/2000 [00:00<?, ?it/s] _ = pairplot ( posterior_samples , limits = [[ - 3 , 3 ], [ - 3 , 3 ]], figsize = ( 4 , 4 )) When performing a sensitivity analysis on this model, we would expect that there is one direction that is less sensitive (from bottom left to top right, along the vector [1, 1]) and one direction that is more sensitive (from top left to bottom right, along [1, -1]). We can recover these directions with the ActiveSubspace module in sbi . sensitivity = ActiveSubspace ( posterior ) e_vals , e_vecs = sensitivity . find_directions ( posterior_log_prob_as_property = True ) Drawing 1000 posterior samples: 0%| | 0/1000 [00:00<?, ?it/s] The method .find_active() returns eigenvalues and the corresponding eigenvectors. It does so by computing the matrix: \\(M = \\mathbb{E}_{p(\\theta|x_o)}[\\nabla_{\\theta}p(\\theta|x_o)^T \\nabla_{\\theta}p(\\theta|x_o)\\) ] It then does an eigendecomposition: \\(M = Q \\Lambda Q^{-1}\\) A strong eigenvalue indicates that the gradient of the posterior density is large, i.e. the system output is sensitive to changes along the direction of the corresponding eigenvector (or active ). The eigenvalue corresponding to the vector [0.68, -0.73] is much larger than the eigenvalue of [0.73, 0.67] . This matches the intuition we developed above. print ( \"Eigenvalues: \\n \" , e_vals , \" \\n \" ) print ( \"Eigenvectors: \\n \" , e_vecs ) Eigenvalues: tensor([2.3552e-06, 7.0754e-05]) Eigenvectors: tensor([[-0.7066, -0.7076], [-0.7076, 0.7066]]) Lastly, we can project the data into the active dimensions. In this case, we will just use one active dimension: projected_data = sensitivity . project ( posterior_samples , num_dimensions = 1 )","title":"Example and further explanation"},{"location":"tutorial/09_sensitivity_analysis/#some-technical-details","text":"The gradients and thus the eigenvectors are computed in z-scored space. The mean and standard deviation are computed w.r.t. the prior distribution. Thus, the gradients (and thus the eigenvales) reflect changes on the scale of the prior. The expected value used to compute the matrix \\(M\\) is estimated using 1000 posterior samples. This value can be set with the .find_active(num_monte_carlo_samples=...) variable. How does this relate to Principal Component Analysis (PCA)? In the example above, the results of PCA would be very similar. However, there are two main differences to PCA: First, PCA ignores local changes in the posterior, whereas the active subspace can change a lot (since it computes the gradient, which is a local quantity). Second, active subspaces can be used characterize the sensitivity of any other quantity w.r.t. circuit parameters. This is outlined below:","title":"Some technical details"},{"location":"tutorial/09_sensitivity_analysis/#computing-the-sensitivity-of-a-specific-summary-statistic","text":"Above, we have shown how to identify directions along which the posterior probability changes rapidly. Notably, the posterior probability reflects how consistent a specific parameter set is with all summary statistics, i.e. the entire \\(x_o\\) . Sometimes, we might be interested in investigating how a specific features is influenced by the parameters. This feature could be one of the values of \\(x_o\\) , but it could also be a different property. As a neuroscience example, in Deistler et al. 2021, we obtained the posterior distribution given burst durations and delays between bursts. After having obtained the posterior, we then wanted to analyse the sensitivity of metabolic cost w.r.t. circuit parameters. The framework we presented above can easily be extended to study such questions. prior = MultivariateNormal ( 0.0 * torch . ones ( 2 ), 2 * torch . eye ( 2 )) def simulator ( theta ): return linear_gaussian ( theta , - 0.8 * torch . ones ( 2 ), torch . eye ( 2 )) posterior = infer ( simulator , prior , num_simulations = 2000 , method = \"SNPE\" ) . set_default_x ( torch . zeros ( 2 ) ) Running 2000 simulations.: 0%| | 0/2000 [00:00<?, ?it/s] Neural network successfully converged after 139 epochs. _ = pairplot ( posterior . sample (( 10_000 ,)), limits = [[ - 3 , 3 ], [ - 3 , 3 ]], figsize = ( 4 , 4 )) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] sensitivity = ActiveSubspace ( posterior ) This time, we begin by drawing samples from the posterior and then computing the desired property for each of the samples (i.e. you will probably have to run simulations for each theta and extract the property from the simulation output). As an example, we assume that the property is just the cube of the first dimension of the simulation output: theta , x = simulate_for_sbi ( simulator , posterior , 5000 ) property_ = x [:, : 1 ] ** 3 # E.g. metabolic cost. Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Running 5000 simulations.: 0%| | 0/5000 [00:00<?, ?it/s] To investigate the sensitivity of a given parameter, we train a neural network to predict the property_ from the parameters and then analyse the neural network as above: \\(M = \\mathbb{E}_{p(\\theta|x_o)}[\\nabla_{\\theta}f(\\theta)^T \\nabla_{\\theta}f(\\theta)\\) ] where \\(f(\\cdot)\\) is the trained neural network. _ = sensitivity . add_property ( theta , property_ ) . train () e_vals , e_vecs = sensitivity . find_directions () Training neural network. Epochs trained: 24 Drawing 1000 posterior samples: 0%| | 0/1000 [00:00<?, ?it/s] print ( \"Eigenvalues: \\n \" , e_vals , \" \\n \" ) print ( \"Eigenvectors: \\n \" , e_vecs ) Eigenvalues: tensor([2.8801e-06, 6.1131e-05]) Eigenvectors: tensor([[ 0.0362, 0.9993], [ 0.9993, -0.0362]]) As we can see, one of the eigenvalues is much smaller than the other one. The larger eigenvalue represents (approximately) the vector [1.0, 0.0] . This makes sense, because only the property_ is influenced only by the first output which, in turn, is influenced only by the first parameter.","title":"Computing the sensitivity of a specific summary statistic"},{"location":"tutorial/10_crafting_summary_statistics/","text":"Crafting summary statistics \u00b6 Many simulators produce outputs that are high-dimesional. For example, a simulator might generate a time series or an image. In a previous tutorial , we discussed how a neural networks can be used to learn summary statistics from such data. In this notebook, we will instead focus on hand-crafting summary statistics. We demonstrate that the choice of summary statistics can be crucial for the performance of the inference algorithm. import numpy as np import torch import matplotlib.pyplot as plt import matplotlib as mpl # sbi import sbi.utils as utils from sbi.inference.base import infer from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.utils.get_nn_models import posterior_nn from sbi.analysis import pairplot # remove top and right axis from plots mpl . rcParams [ \"axes.spines.right\" ] = False mpl . rcParams [ \"axes.spines.top\" ] = False This notebook is not intended to provide a one-fits-all approach. In fact it argues against this: it argues for the user to carefully construct their summary statistics to (i) further help the user understand his observed data, (ii) help them understand exactly what they want the model to recover from the observation and (iii) help the inference framework itself. Example 1: The quadratic function \u00b6 Assume we have a simulator that is given by a quadratic function: \\(x(t) = a\\cdot t^2 + b\\cdot t + c + \\epsilon\\) , where \\(\\epsilon\\) is Gaussian observation noise and \\(\\theta = \\{a, b, c\\}\\) are the parameters. Given an observed quadratic function \\(x_o\\) , we would like to recover the posterior over parameters \\(a_o\\) , \\(b_o\\) and \\(c_o\\) . 1.1 Prior over parameters \u00b6 First we define a prior distribution over parameters \\(a\\) , \\(b\\) and \\(c\\) . Here, we use a uniform prior for \\(a\\) , \\(b\\) and \\(c\\) to go from \\(-1\\) to \\(1\\) . prior_min = [ - 1 , - 1 , - 1 ] prior_max = [ 1 , 1 , 1 ] prior = utils . torchutils . BoxUniform ( low = torch . as_tensor ( prior_min ), high = torch . as_tensor ( prior_max ) ) 1.2 Simulator \u00b6 Defining some helper functions first: def create_t_x ( theta , seed = None ): \"\"\"Return an t, x array for plotting based on params\"\"\" if theta . ndim == 1 : theta = theta [ np . newaxis , :] if seed is not None : rng = np . random . RandomState ( seed ) else : rng = np . random . RandomState () t = np . linspace ( - 1 , 1 , 200 ) ts = np . repeat ( t [:, np . newaxis ], theta . shape [ 0 ], axis = 1 ) x = ( theta [:, 0 ] * ts ** 2 + theta [:, 1 ] * ts + theta [:, 2 ] + 0.01 * rng . randn ( ts . shape [ 0 ], theta . shape [ 0 ]) ) return t , x def eval ( theta , t , seed = None ): \"\"\"Evaluate the quadratic function at `t`\"\"\" if theta . ndim == 1 : theta = theta [ np . newaxis , :] if seed is not None : rng = np . random . RandomState ( seed ) else : rng = np . random . RandomState () return theta [:, 0 ] * t ** 2 + theta [:, 1 ] * t + theta [:, 2 ] + 0.01 * rng . randn ( 1 ) In this example, we generate the observation \\(x_o\\) from parameters \\(\\theta_o=(a_o, b_o, c_o)=(0.3, -0.2, -0.1)\\) . The observation as follows. theta_o = np . array ([ 0.3 , - 0.2 , - 0.1 ]) t , x = create_t_x ( theta_o ) plt . plot ( t , x , \"k\" ) [<matplotlib.lines.Line2D at 0x7f828b191d60>] 1.3 Summary statistics \u00b6 We will compare two methods for defining summary statistics. One method uses three summary statistics which are function evaluations at three points in time. The other method uses a single summary statistic: the mean squared error between the observed and the simulated trace. In the second case, one then tries to obtain the posterior \\(p(\\theta | 0)\\) , i.e. the error being zero. These two methods are implemented below: \\(\\textbf{get_3_values()}\\) returns 3 function evaluations at \\(x=-0.5, x=0\\) and \\(x=0.75\\) . \\(\\textbf{get_MSE()}\\) returns the mean squared error between true and a quadratic function corresponding to a prior distributions sample. def get_3_values ( theta , seed = None ): \"\"\" Return 3 'y' values corresponding to x=-0.5,0,0.75 as summary statistic vector \"\"\" return np . array ( [ eval ( theta , - 0.5 , seed = seed ), eval ( theta , 0 , seed = seed ), eval ( theta , 0.75 , seed = seed ), ] ) . T def get_MSE ( theta , theta_o , seed = None ): \"\"\" Return the mean-squared error (MSE) i.e. Euclidean distance from the observation function \"\"\" _ , y = create_t_x ( theta_o , seed = seed ) # truth _ , y_ = create_t_x ( theta , seed = seed ) # simulations return np . mean ( np . square ( y_ - y ), axis = 0 , keepdims = True ) . T # MSE Let\u2019s try a couple of samples from our prior and see their summary statistics. Notice that these indeed change in small amounts every time you rerun it due to the noise, except if you set the seed. 1.4 Simulating data \u00b6 Let us see various plots of prior samples and their summary statistics versus the truth, i.e. our artificial observation. t , x_truth = create_t_x ( theta_o ) plt . plot ( t , x_truth , \"k\" , zorder = 1 , label = \"truth\" ) n_samples = 100 theta = prior . sample (( n_samples ,)) t , x = create_t_x ( theta . numpy ()) plt . plot ( t , x , \"grey\" , zorder = 0 ) plt . legend () <matplotlib.legend.Legend at 0x7f8289154eb0> In summary, we defined reasonable summary statistics and, a priori, there might be an appararent reason why one method would be better than another. When we do inference, we\u2019d like our posterior to focus around parameter samples that have their simulated MSE very close to 0 (i.e. the truth MSE summary statistic) or their 3 extracted \\((t, x)\\) coordinates to be the truthful ones. 1.5 Inference \u00b6 1.5.1 Using the MSE \u00b6 Let\u2019s see if we can use the MSE to recover the true observation parameters \\(\\theta_o=(a_0,b_0,c_0)\\) . theta = prior . sample (( 1000 ,)) x = get_MSE ( theta . numpy (), theta_o ) theta = torch . as_tensor ( theta , dtype = torch . float32 ) x = torch . as_tensor ( x , dtype = torch . float32 ) inference = SNPE ( prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () Neural network successfully converged after 181 epochs. Now that we\u2019ve build the posterior as such, we can see how likely it finds certain parameters given that we tell it that we\u2019ve observed a certain summary statistic (in this case the MSE). We can then sample from it. x_o = torch . as_tensor ( [ [ 0.0 , ] ] ) theta_p = posterior . sample (( 10000 ,), x = x_o ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] fig , axes = pairplot ( theta_p , limits = list ( zip ( prior_min , prior_max )), ticks = list ( zip ( prior_min , prior_max )), figsize = ( 7 , 7 ), labels = [ \"a\" , \"b\" , \"c\" ], points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , points = theta_o , ); The posterior seems to pretty broad: i.e. it is not so certain about the \u2018true\u2019 parameters (here showcased in red). x_o_t , x_o_x = create_t_x ( theta_o ) plt . plot ( x_o_t , x_o_x , \"k\" , zorder = 1 , label = \"truth\" ) theta_p = posterior . sample (( 10 ,), x = x_o ) x_t , x_x = create_t_x ( theta_p . numpy ()) plt . plot ( x_t , x_x , \"grey\" , zorder = 0 ) plt . legend () Drawing 10 posterior samples: 0%| | 0/10 [00:00<?, ?it/s] <matplotlib.legend.Legend at 0x7f82882cd670> The functions are a bit closer to the observation than prior samples, but many posterior samples generate activity that is very far off from the observation. We would expect sbi do better on such a simple example. So what\u2019s going on? Do we need more simulations? Feel free to try, but below we will show that one can use the same number of simulation samples with different summary statistics and do much better. 1.5.2 Using 3 coordinates as summary statistics \u00b6 x = get_3_values ( theta . numpy ()) x = torch . as_tensor ( x , dtype = torch . float32 ) inference = SNPE ( prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () Neural network successfully converged after 127 epochs. The observation is now given by the values of the observed trace at three different coordinates: x_o = torch . as_tensor ( get_3_values ( theta_o ), dtype = float ) theta_p = posterior . sample (( 10000 ,), x = x_o ) fig , axes = pairplot ( theta_p , limits = list ( zip ( prior_min , prior_max )), ticks = list ( zip ( prior_min , prior_max )), figsize = ( 7 , 7 ), labels = [ \"a\" , \"b\" , \"c\" ], points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , points = theta_o , ); Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] x_o_x , x_o_y = create_t_x ( theta_o ) plt . plot ( x_o_x , x_o_y , \"k\" , zorder = 1 , label = \"truth\" ) theta_p = posterior . sample (( 100 ,), x = x_o ) ind_10_highest = np . argsort ( np . array ( posterior . log_prob ( theta = theta_p , x = x_o )))[ - 10 :] theta_p_considered = theta_p [ ind_10_highest , :] x_x , x_y = create_t_x ( theta_p_considered . numpy ()) plt . plot ( x_x , x_y , \"grey\" , zorder = 0 ) plt . legend () Drawing 100 posterior samples: 0%| | 0/100 [00:00<?, ?it/s] <matplotlib.legend.Legend at 0x7f82885b4af0> Ok this definitely seems to work! The posterior correctly focuses on the true parameters with greater confidence. You can experiment yourself how this improves further with more training samples or you could try to see how many you\u2019d exactly need to keep having a satisfyingly looking posterior and high posterior sample simulations. So, what\u2019s up with the MSE? Why does it not seem so informative to constrain the posterior? In 1.6, we\u2019ll see both the power and pitfalls of summary statistics. 1.6 Prior simulations\u2019 summary statistics vs observed summary statistics \u00b6 Let\u2019s try to understand this\u2026Let\u2019s look at a histogram of the four summary statistics we\u2019ve experimented with, and see how they compare to our observed truth summary statistic vector: stats = np . concatenate ( ( get_3_values ( theta . numpy ()), get_MSE ( theta . numpy (), theta_o )), axis = 1 ) x_o = np . concatenate (( get_3_values ( theta_o ), np . asarray ([[ 0.0 ]])), axis = 1 ) features = [ \"y @ x=-0.5\" , \"y @ x=0\" , \"y @ x=0.7\" , \"MSE\" ] fig , axes = plt . subplots ( 1 , 4 , figsize = ( 10 , 3 )) xlabelfontsize = 10 for i , ax in enumerate ( axes . reshape ( - 1 )): ax . hist ( stats [:, i ], color = [ \"grey\" ], alpha = 0.5 , bins = 30 , density = True , histtype = \"stepfilled\" , label = [ \"simulations\" ], ) ax . axvline ( x_o [:, i ], label = \"observation\" ) ax . set_xlabel ( features [ i ], fontsize = xlabelfontsize ) if i == 3 : ax . legend () plt . tight_layout () We see that for the coordinates (three plots on the left), simulations cover the observation. That is: it covers it from the left and right side in each case. For the MSE, simulations never truly reach the observation \\(0.0\\) . For the trained neural network, it is strongly preferable if the simulations cover the observation. In that case, the neural network can interpolate between simulated data. Contrary to that, for the MSE, the neural network has to extrapolate : it never observes a simulation that is to the left of the observation and has to extrapolate to the region of MSE= \\(0.0\\) . This seems like a technical point but, as we saw above, it makes a huge difference in performance. 1.7 Explicit recommendations \u00b6 We give some explicit recommendation when using summary statistics Visualize the histogram of each summary statistic and plot the value of the observation. If, for some summary statistics, the observation is not covered (or is at the very border, e.g. the MSE above), the trained neural network will struggle. Do not use an \u201cerror\u201d as summary statistic. This is common in optimization (e.g. genetic algorithms), but it often leads to trouble in sbi due to the reason above. Only use summary statistics that are necessary. The less summary statistics you use, the less can go wrong with them. Of course, you have to ensure that the summary statistics describe the raw data sufficiently well.","title":"Crafting summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#crafting-summary-statistics","text":"Many simulators produce outputs that are high-dimesional. For example, a simulator might generate a time series or an image. In a previous tutorial , we discussed how a neural networks can be used to learn summary statistics from such data. In this notebook, we will instead focus on hand-crafting summary statistics. We demonstrate that the choice of summary statistics can be crucial for the performance of the inference algorithm. import numpy as np import torch import matplotlib.pyplot as plt import matplotlib as mpl # sbi import sbi.utils as utils from sbi.inference.base import infer from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.utils.get_nn_models import posterior_nn from sbi.analysis import pairplot # remove top and right axis from plots mpl . rcParams [ \"axes.spines.right\" ] = False mpl . rcParams [ \"axes.spines.top\" ] = False This notebook is not intended to provide a one-fits-all approach. In fact it argues against this: it argues for the user to carefully construct their summary statistics to (i) further help the user understand his observed data, (ii) help them understand exactly what they want the model to recover from the observation and (iii) help the inference framework itself.","title":"Crafting summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#example-1-the-quadratic-function","text":"Assume we have a simulator that is given by a quadratic function: \\(x(t) = a\\cdot t^2 + b\\cdot t + c + \\epsilon\\) , where \\(\\epsilon\\) is Gaussian observation noise and \\(\\theta = \\{a, b, c\\}\\) are the parameters. Given an observed quadratic function \\(x_o\\) , we would like to recover the posterior over parameters \\(a_o\\) , \\(b_o\\) and \\(c_o\\) .","title":"Example 1: The quadratic function"},{"location":"tutorial/10_crafting_summary_statistics/#11-prior-over-parameters","text":"First we define a prior distribution over parameters \\(a\\) , \\(b\\) and \\(c\\) . Here, we use a uniform prior for \\(a\\) , \\(b\\) and \\(c\\) to go from \\(-1\\) to \\(1\\) . prior_min = [ - 1 , - 1 , - 1 ] prior_max = [ 1 , 1 , 1 ] prior = utils . torchutils . BoxUniform ( low = torch . as_tensor ( prior_min ), high = torch . as_tensor ( prior_max ) )","title":"1.1 Prior over parameters"},{"location":"tutorial/10_crafting_summary_statistics/#12-simulator","text":"Defining some helper functions first: def create_t_x ( theta , seed = None ): \"\"\"Return an t, x array for plotting based on params\"\"\" if theta . ndim == 1 : theta = theta [ np . newaxis , :] if seed is not None : rng = np . random . RandomState ( seed ) else : rng = np . random . RandomState () t = np . linspace ( - 1 , 1 , 200 ) ts = np . repeat ( t [:, np . newaxis ], theta . shape [ 0 ], axis = 1 ) x = ( theta [:, 0 ] * ts ** 2 + theta [:, 1 ] * ts + theta [:, 2 ] + 0.01 * rng . randn ( ts . shape [ 0 ], theta . shape [ 0 ]) ) return t , x def eval ( theta , t , seed = None ): \"\"\"Evaluate the quadratic function at `t`\"\"\" if theta . ndim == 1 : theta = theta [ np . newaxis , :] if seed is not None : rng = np . random . RandomState ( seed ) else : rng = np . random . RandomState () return theta [:, 0 ] * t ** 2 + theta [:, 1 ] * t + theta [:, 2 ] + 0.01 * rng . randn ( 1 ) In this example, we generate the observation \\(x_o\\) from parameters \\(\\theta_o=(a_o, b_o, c_o)=(0.3, -0.2, -0.1)\\) . The observation as follows. theta_o = np . array ([ 0.3 , - 0.2 , - 0.1 ]) t , x = create_t_x ( theta_o ) plt . plot ( t , x , \"k\" ) [<matplotlib.lines.Line2D at 0x7f828b191d60>]","title":"1.2 Simulator"},{"location":"tutorial/10_crafting_summary_statistics/#13-summary-statistics","text":"We will compare two methods for defining summary statistics. One method uses three summary statistics which are function evaluations at three points in time. The other method uses a single summary statistic: the mean squared error between the observed and the simulated trace. In the second case, one then tries to obtain the posterior \\(p(\\theta | 0)\\) , i.e. the error being zero. These two methods are implemented below: \\(\\textbf{get_3_values()}\\) returns 3 function evaluations at \\(x=-0.5, x=0\\) and \\(x=0.75\\) . \\(\\textbf{get_MSE()}\\) returns the mean squared error between true and a quadratic function corresponding to a prior distributions sample. def get_3_values ( theta , seed = None ): \"\"\" Return 3 'y' values corresponding to x=-0.5,0,0.75 as summary statistic vector \"\"\" return np . array ( [ eval ( theta , - 0.5 , seed = seed ), eval ( theta , 0 , seed = seed ), eval ( theta , 0.75 , seed = seed ), ] ) . T def get_MSE ( theta , theta_o , seed = None ): \"\"\" Return the mean-squared error (MSE) i.e. Euclidean distance from the observation function \"\"\" _ , y = create_t_x ( theta_o , seed = seed ) # truth _ , y_ = create_t_x ( theta , seed = seed ) # simulations return np . mean ( np . square ( y_ - y ), axis = 0 , keepdims = True ) . T # MSE Let\u2019s try a couple of samples from our prior and see their summary statistics. Notice that these indeed change in small amounts every time you rerun it due to the noise, except if you set the seed.","title":"1.3 Summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#14-simulating-data","text":"Let us see various plots of prior samples and their summary statistics versus the truth, i.e. our artificial observation. t , x_truth = create_t_x ( theta_o ) plt . plot ( t , x_truth , \"k\" , zorder = 1 , label = \"truth\" ) n_samples = 100 theta = prior . sample (( n_samples ,)) t , x = create_t_x ( theta . numpy ()) plt . plot ( t , x , \"grey\" , zorder = 0 ) plt . legend () <matplotlib.legend.Legend at 0x7f8289154eb0> In summary, we defined reasonable summary statistics and, a priori, there might be an appararent reason why one method would be better than another. When we do inference, we\u2019d like our posterior to focus around parameter samples that have their simulated MSE very close to 0 (i.e. the truth MSE summary statistic) or their 3 extracted \\((t, x)\\) coordinates to be the truthful ones.","title":"1.4 Simulating data"},{"location":"tutorial/10_crafting_summary_statistics/#15-inference","text":"","title":"1.5 Inference"},{"location":"tutorial/10_crafting_summary_statistics/#151-using-the-mse","text":"Let\u2019s see if we can use the MSE to recover the true observation parameters \\(\\theta_o=(a_0,b_0,c_0)\\) . theta = prior . sample (( 1000 ,)) x = get_MSE ( theta . numpy (), theta_o ) theta = torch . as_tensor ( theta , dtype = torch . float32 ) x = torch . as_tensor ( x , dtype = torch . float32 ) inference = SNPE ( prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () Neural network successfully converged after 181 epochs. Now that we\u2019ve build the posterior as such, we can see how likely it finds certain parameters given that we tell it that we\u2019ve observed a certain summary statistic (in this case the MSE). We can then sample from it. x_o = torch . as_tensor ( [ [ 0.0 , ] ] ) theta_p = posterior . sample (( 10000 ,), x = x_o ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] fig , axes = pairplot ( theta_p , limits = list ( zip ( prior_min , prior_max )), ticks = list ( zip ( prior_min , prior_max )), figsize = ( 7 , 7 ), labels = [ \"a\" , \"b\" , \"c\" ], points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , points = theta_o , ); The posterior seems to pretty broad: i.e. it is not so certain about the \u2018true\u2019 parameters (here showcased in red). x_o_t , x_o_x = create_t_x ( theta_o ) plt . plot ( x_o_t , x_o_x , \"k\" , zorder = 1 , label = \"truth\" ) theta_p = posterior . sample (( 10 ,), x = x_o ) x_t , x_x = create_t_x ( theta_p . numpy ()) plt . plot ( x_t , x_x , \"grey\" , zorder = 0 ) plt . legend () Drawing 10 posterior samples: 0%| | 0/10 [00:00<?, ?it/s] <matplotlib.legend.Legend at 0x7f82882cd670> The functions are a bit closer to the observation than prior samples, but many posterior samples generate activity that is very far off from the observation. We would expect sbi do better on such a simple example. So what\u2019s going on? Do we need more simulations? Feel free to try, but below we will show that one can use the same number of simulation samples with different summary statistics and do much better.","title":"1.5.1 Using the MSE"},{"location":"tutorial/10_crafting_summary_statistics/#152-using-3-coordinates-as-summary-statistics","text":"x = get_3_values ( theta . numpy ()) x = torch . as_tensor ( x , dtype = torch . float32 ) inference = SNPE ( prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () Neural network successfully converged after 127 epochs. The observation is now given by the values of the observed trace at three different coordinates: x_o = torch . as_tensor ( get_3_values ( theta_o ), dtype = float ) theta_p = posterior . sample (( 10000 ,), x = x_o ) fig , axes = pairplot ( theta_p , limits = list ( zip ( prior_min , prior_max )), ticks = list ( zip ( prior_min , prior_max )), figsize = ( 7 , 7 ), labels = [ \"a\" , \"b\" , \"c\" ], points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , points = theta_o , ); Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] x_o_x , x_o_y = create_t_x ( theta_o ) plt . plot ( x_o_x , x_o_y , \"k\" , zorder = 1 , label = \"truth\" ) theta_p = posterior . sample (( 100 ,), x = x_o ) ind_10_highest = np . argsort ( np . array ( posterior . log_prob ( theta = theta_p , x = x_o )))[ - 10 :] theta_p_considered = theta_p [ ind_10_highest , :] x_x , x_y = create_t_x ( theta_p_considered . numpy ()) plt . plot ( x_x , x_y , \"grey\" , zorder = 0 ) plt . legend () Drawing 100 posterior samples: 0%| | 0/100 [00:00<?, ?it/s] <matplotlib.legend.Legend at 0x7f82885b4af0> Ok this definitely seems to work! The posterior correctly focuses on the true parameters with greater confidence. You can experiment yourself how this improves further with more training samples or you could try to see how many you\u2019d exactly need to keep having a satisfyingly looking posterior and high posterior sample simulations. So, what\u2019s up with the MSE? Why does it not seem so informative to constrain the posterior? In 1.6, we\u2019ll see both the power and pitfalls of summary statistics.","title":"1.5.2 Using 3 coordinates as summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#16-prior-simulations-summary-statistics-vs-observed-summary-statistics","text":"Let\u2019s try to understand this\u2026Let\u2019s look at a histogram of the four summary statistics we\u2019ve experimented with, and see how they compare to our observed truth summary statistic vector: stats = np . concatenate ( ( get_3_values ( theta . numpy ()), get_MSE ( theta . numpy (), theta_o )), axis = 1 ) x_o = np . concatenate (( get_3_values ( theta_o ), np . asarray ([[ 0.0 ]])), axis = 1 ) features = [ \"y @ x=-0.5\" , \"y @ x=0\" , \"y @ x=0.7\" , \"MSE\" ] fig , axes = plt . subplots ( 1 , 4 , figsize = ( 10 , 3 )) xlabelfontsize = 10 for i , ax in enumerate ( axes . reshape ( - 1 )): ax . hist ( stats [:, i ], color = [ \"grey\" ], alpha = 0.5 , bins = 30 , density = True , histtype = \"stepfilled\" , label = [ \"simulations\" ], ) ax . axvline ( x_o [:, i ], label = \"observation\" ) ax . set_xlabel ( features [ i ], fontsize = xlabelfontsize ) if i == 3 : ax . legend () plt . tight_layout () We see that for the coordinates (three plots on the left), simulations cover the observation. That is: it covers it from the left and right side in each case. For the MSE, simulations never truly reach the observation \\(0.0\\) . For the trained neural network, it is strongly preferable if the simulations cover the observation. In that case, the neural network can interpolate between simulated data. Contrary to that, for the MSE, the neural network has to extrapolate : it never observes a simulation that is to the left of the observation and has to extrapolate to the region of MSE= \\(0.0\\) . This seems like a technical point but, as we saw above, it makes a huge difference in performance.","title":"1.6 Prior simulations' summary statistics vs observed summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#17-explicit-recommendations","text":"We give some explicit recommendation when using summary statistics Visualize the histogram of each summary statistic and plot the value of the observation. If, for some summary statistics, the observation is not covered (or is at the very border, e.g. the MSE above), the trained neural network will struggle. Do not use an \u201cerror\u201d as summary statistic. This is common in optimization (e.g. genetic algorithms), but it often leads to trouble in sbi due to the reason above. Only use summary statistics that are necessary. The less summary statistics you use, the less can go wrong with them. Of course, you have to ensure that the summary statistics describe the raw data sufficiently well.","title":"1.7 Explicit recommendations"},{"location":"tutorial/11_sampler_interface/","text":"The sampler interface \u00b6 Note: this tutorial requires that the user is already familiar with the flexible interface . sbi implements three methods: SNPE, SNLE, and SNRE. When using SNPE, the trained neural network directly approximates the posterior. Thus, sampling from the posterior can be done by sampling from the trained neural network. The neural networks trained in SNLE and SNRE approximate the likelihood(-ratio). Thus, in order to draw samples from the posterior, one has to perform additional sampling steps, e.g. Markov-chain Monte-Carlo (MCMC). In sbi , the implemented samplers are: Markov-chain Monte-Carlo (MCMC) Rejection sampling Variational inference (VI) When using the flexible interface, the sampler as well as its attributes can be set with sample_with=\"mcmc\" , mcmc_method=\"slice_np\" , and mcmc_parameters={} . However, for full flexibility in customizing the sampler, we recommend using the sampler interface . This interface is described here. Further details can be found here . Main syntax for SNLE \u00b6 import torch from sbi.inference import SNLE from sbi.inference import likelihood_estimator_based_potential , MCMCPosterior # dummy Gaussian simulator for demonstration num_dim = 2 prior = torch . distributions . MultivariateNormal ( torch . zeros ( num_dim ), torch . eye ( num_dim )) theta = prior . sample (( 1000 ,)) x = theta + torch . randn (( 1000 , num_dim )) x_o = torch . randn (( 1 , num_dim )) inference = SNLE ( show_progress_bars = False ) likelihood_estimator = inference . append_simulations ( theta , x ) . train () potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) Neural network successfully converged after 52 epochs. Further explanation \u00b6 The first lines are the same as for the flexible interface: inference = SNLE () likelihood_estimator = inference . append_simulations ( theta , x ) . train () Neural network successfully converged after 33 epochs. Next, we obtain the potential function. A potential function is a function of the parameter \\(f(\\theta)\\) . The posterior is proportional to the product of likelihood and prior: \\(p(\\theta | x_o) \\propto p(x_o | \\theta)p(\\theta)\\) . The potential function is the logarithm of the right-hand side of this equation: \\(f(\\theta) = \\log(p(x_o | \\theta)p(\\theta))\\) potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) By calling the potential_fn , you can evaluate the potential: # Assuming that your parameters are 1D. potential = potential_fn ( torch . zeros ( 1 , num_dim ) ) # -> returns f(0) = log( p(x_o|0) p(0) ) The other object that is returned by likelihood_estimator_based_potential is a parameter_transform . The parameter_transform is a pytorch transform . The parameter_transform is a fixed transform that is can be applied to parameter theta . It transforms the parameters into unconstrained space (if the prior is bounded, e.g. BoxUniform ), and standardizes the parameters (i.e. zero mean, one std). Using parameter_transform during sampling is optional, but it usually improves the performance of MCMC. theta_tf = parameter_transform ( torch . zeros ( 1 , num_dim )) theta_original = parameter_transform . inv ( theta_tf ) print ( theta_original ) # -> tensor([[0.0]]) tensor([[0., 0.]]) After having obtained the potential_fn , we can sample from the posterior with MCMC or rejection sampling: from sbi.inference import MCMCPosterior , RejectionPosterior posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) posterior = RejectionPosterior ( potential_fn , proposal = prior ) Main syntax for SNPE \u00b6 SNPE usually does not require MCMC or rejection sampling (if you still need it, you can use the same syntax as above with the posterior_estimator_based_potential function). Instead, SNPE samples from the neural network. If the support of the prior is bounded, some samples can lie outside of the support of the prior. The DirectPosterior class automatically rejects these samples: from sbi.inference import SNPE from sbi.inference import DirectPosterior inference = SNPE () posterior_estimator = inference . append_simulations ( theta , x ) . train () posterior = DirectPosterior ( posterior_estimator , prior = prior ) Neural network successfully converged after 57 epochs.","title":"Sampler interface"},{"location":"tutorial/11_sampler_interface/#the-sampler-interface","text":"Note: this tutorial requires that the user is already familiar with the flexible interface . sbi implements three methods: SNPE, SNLE, and SNRE. When using SNPE, the trained neural network directly approximates the posterior. Thus, sampling from the posterior can be done by sampling from the trained neural network. The neural networks trained in SNLE and SNRE approximate the likelihood(-ratio). Thus, in order to draw samples from the posterior, one has to perform additional sampling steps, e.g. Markov-chain Monte-Carlo (MCMC). In sbi , the implemented samplers are: Markov-chain Monte-Carlo (MCMC) Rejection sampling Variational inference (VI) When using the flexible interface, the sampler as well as its attributes can be set with sample_with=\"mcmc\" , mcmc_method=\"slice_np\" , and mcmc_parameters={} . However, for full flexibility in customizing the sampler, we recommend using the sampler interface . This interface is described here. Further details can be found here .","title":"The sampler interface"},{"location":"tutorial/11_sampler_interface/#main-syntax-for-snle","text":"import torch from sbi.inference import SNLE from sbi.inference import likelihood_estimator_based_potential , MCMCPosterior # dummy Gaussian simulator for demonstration num_dim = 2 prior = torch . distributions . MultivariateNormal ( torch . zeros ( num_dim ), torch . eye ( num_dim )) theta = prior . sample (( 1000 ,)) x = theta + torch . randn (( 1000 , num_dim )) x_o = torch . randn (( 1 , num_dim )) inference = SNLE ( show_progress_bars = False ) likelihood_estimator = inference . append_simulations ( theta , x ) . train () potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) Neural network successfully converged after 52 epochs.","title":"Main syntax for SNLE"},{"location":"tutorial/11_sampler_interface/#further-explanation","text":"The first lines are the same as for the flexible interface: inference = SNLE () likelihood_estimator = inference . append_simulations ( theta , x ) . train () Neural network successfully converged after 33 epochs. Next, we obtain the potential function. A potential function is a function of the parameter \\(f(\\theta)\\) . The posterior is proportional to the product of likelihood and prior: \\(p(\\theta | x_o) \\propto p(x_o | \\theta)p(\\theta)\\) . The potential function is the logarithm of the right-hand side of this equation: \\(f(\\theta) = \\log(p(x_o | \\theta)p(\\theta))\\) potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) By calling the potential_fn , you can evaluate the potential: # Assuming that your parameters are 1D. potential = potential_fn ( torch . zeros ( 1 , num_dim ) ) # -> returns f(0) = log( p(x_o|0) p(0) ) The other object that is returned by likelihood_estimator_based_potential is a parameter_transform . The parameter_transform is a pytorch transform . The parameter_transform is a fixed transform that is can be applied to parameter theta . It transforms the parameters into unconstrained space (if the prior is bounded, e.g. BoxUniform ), and standardizes the parameters (i.e. zero mean, one std). Using parameter_transform during sampling is optional, but it usually improves the performance of MCMC. theta_tf = parameter_transform ( torch . zeros ( 1 , num_dim )) theta_original = parameter_transform . inv ( theta_tf ) print ( theta_original ) # -> tensor([[0.0]]) tensor([[0., 0.]]) After having obtained the potential_fn , we can sample from the posterior with MCMC or rejection sampling: from sbi.inference import MCMCPosterior , RejectionPosterior posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) posterior = RejectionPosterior ( potential_fn , proposal = prior )","title":"Further explanation"},{"location":"tutorial/11_sampler_interface/#main-syntax-for-snpe","text":"SNPE usually does not require MCMC or rejection sampling (if you still need it, you can use the same syntax as above with the posterior_estimator_based_potential function). Instead, SNPE samples from the neural network. If the support of the prior is bounded, some samples can lie outside of the support of the prior. The DirectPosterior class automatically rejects these samples: from sbi.inference import SNPE from sbi.inference import DirectPosterior inference = SNPE () posterior_estimator = inference . append_simulations ( theta , x ) . train () posterior = DirectPosterior ( posterior_estimator , prior = prior ) Neural network successfully converged after 57 epochs.","title":"Main syntax for SNPE"},{"location":"tutorial/12_diagnostics_posterior_predictive_check/","text":"Posterior Predictive Checks (PPC) in SBI \u00b6 A common safety check performed as part of inference are Posterior Predictive Checks (PPC) . A PPC compares data \\(x_{\\text{pp}}\\) generated using the parameters \\(\\theta_{\\text{posterior}}\\) sampled from the posterior with the observed data \\(x_o\\) . The general concept is that -if the inference is correct- the generated data \\(x_{\\text{pp}}\\) should \u201clook similar\u201d the oberved data \\(x_0\\) . Said differently, \\(x_o\\) should be within the support of \\(x_{\\text{pp}}\\) . A PPC usually shouldn\u2019t be used as a validation metric . Nonetheless a PPC is a good start for an inference diagnosis and can provide with an intuition about any bias introduced in inference: does \\(x_{\\text{pp}}\\) systematically differ from \\(x_o\\) ? Main syntax \u00b6 from sbi.analysis import pairplot # A PPC is performed after we trained or neural posterior posterior . set_default_x ( x_o ) # We draw theta samples from the posterior. This part is not in the scope of SBI posterior_samples = posterior . sample (( 5_000 ,)) # We use posterior theta samples to generate x data x_pp = simulator ( posterior_samples ) # We verify if the observed data falls within the support of the generated data _ = pairplot ( samples = x_pp , points = x_o ) Performing a PPC over a toy example \u00b6 Below we provide an example Posterior Predictive Check (PPC) over some toy example: from sbi.analysis import pairplot import torch _ = torch . manual_seed ( 0 ) We work on an inference problem over three parameters using any of the techniques implemented in sbi . In this tutorial, we load the dummy posterior: from toy_posterior_for_07_cc import ExamplePosterior posterior = ExamplePosterior () Let us say that we are observing the data point \\(x_o\\) : D = 5 # simulator output was 5-dimensional x_o = torch . ones ( 1 , D ) posterior . set_default_x ( x_o ) The posterior can be used to draw \\(\\theta_{\\text{posterior}}\\) samples: posterior_samples = posterior . sample (( 5_000 ,)) fig , ax = pairplot ( samples = posterior_samples , limits = torch . tensor ([[ - 2.5 , 2.5 ]] * 3 ), offdiag = [ \"kde\" ], diag = [ \"kde\" ], figsize = ( 5 , 5 ), labels = [ rf \"$\\theta_ { d } $\" for d in range ( 3 )], ) Now we can use our simulator to generate some data \\(x_{\\text{PP}}\\) , using as input parameters the poterior samples \\(\\theta_{\\text{posterior}}\\) . Note that the simulation part is not in the sbi scope, so any simulator -including a non-Python one- can be used at this stage. In our case we\u2019ll use a dummy simulator: def dummy_simulator ( posterior_samples : torch . Tensor , * args , ** kwargs ) -> torch . Tensor : sample_size = posterior_samples . shape [ 0 ] scale = 1.0 shift = torch . distributions . Gumbel ( loc = torch . zeros ( D ), scale = scale / 2 ) . sample () return torch . distributions . Gumbel ( loc = x_o [ 0 ] + shift , scale = scale ) . sample ( ( sample_size ,) ) x_pp = dummy_simulator ( posterior_samples ) Plotting \\(x_o\\) against the \\(x_{\\text{pp}}\\) , we perform a PPC that plays the role of a sanity check. In this case, the check indicates that \\(x_o\\) falls right within the support of \\(x_{\\text{pp}}\\) , which should make the experimenter rather confident about the estimated posterior : _ = pairplot ( samples = x_pp , points = x_o [ 0 ], limits = torch . tensor ([[ - 2.0 , 5.0 ]] * 5 ), points_colors = \"red\" , figsize = ( 8 , 8 ), offdiag = \"scatter\" , scatter_offdiag = dict ( marker = \".\" , s = 5 ), points_offdiag = dict ( marker = \"+\" , markersize = 20 ), labels = [ rf \"$x_ { d } $\" for d in range ( D )], ) In contrast, \\(x_o\\) falling well outside the support of \\(x_{\\text{pp}}\\) is indicative of a failure to estimate the correct posterior. Here we simulate such a failure mode: error_shift = - 2.0 * torch . ones ( 1 , 5 ) _ = pairplot ( samples = x_pp , points = x_o [ 0 ] + error_shift , limits = torch . tensor ([[ - 2.0 , 5.0 ]] * 5 ), points_colors = \"red\" , figsize = ( 8 , 8 ), offdiag = \"scatter\" , scatter_offdiag = dict ( marker = \".\" , s = 5 ), points_offdiag = dict ( marker = \"+\" , markersize = 20 ), labels = [ rf \"$x_ { d } $\" for d in range ( D )], ) A typical way to investigate this issue would be to run a prior* predictive check , applying the same plotting strategy, but drawing \\(\\theta\\) from the prior instead of the posterior. **The support for \\(x_{\\text{pp}}\\) should be larger and should contain \\(x_o\\) * . If this check is successful, the \u201cblame\u201d can then be shifted to the inference (method used, convergence of density estimators, number of sequential rounds, etc\u2026).","title":"Posterior predictive checks"},{"location":"tutorial/12_diagnostics_posterior_predictive_check/#posterior-predictive-checks-ppc-in-sbi","text":"A common safety check performed as part of inference are Posterior Predictive Checks (PPC) . A PPC compares data \\(x_{\\text{pp}}\\) generated using the parameters \\(\\theta_{\\text{posterior}}\\) sampled from the posterior with the observed data \\(x_o\\) . The general concept is that -if the inference is correct- the generated data \\(x_{\\text{pp}}\\) should \u201clook similar\u201d the oberved data \\(x_0\\) . Said differently, \\(x_o\\) should be within the support of \\(x_{\\text{pp}}\\) . A PPC usually shouldn\u2019t be used as a validation metric . Nonetheless a PPC is a good start for an inference diagnosis and can provide with an intuition about any bias introduced in inference: does \\(x_{\\text{pp}}\\) systematically differ from \\(x_o\\) ?","title":"Posterior Predictive Checks (PPC) in SBI"},{"location":"tutorial/12_diagnostics_posterior_predictive_check/#main-syntax","text":"from sbi.analysis import pairplot # A PPC is performed after we trained or neural posterior posterior . set_default_x ( x_o ) # We draw theta samples from the posterior. This part is not in the scope of SBI posterior_samples = posterior . sample (( 5_000 ,)) # We use posterior theta samples to generate x data x_pp = simulator ( posterior_samples ) # We verify if the observed data falls within the support of the generated data _ = pairplot ( samples = x_pp , points = x_o )","title":"Main syntax"},{"location":"tutorial/12_diagnostics_posterior_predictive_check/#performing-a-ppc-over-a-toy-example","text":"Below we provide an example Posterior Predictive Check (PPC) over some toy example: from sbi.analysis import pairplot import torch _ = torch . manual_seed ( 0 ) We work on an inference problem over three parameters using any of the techniques implemented in sbi . In this tutorial, we load the dummy posterior: from toy_posterior_for_07_cc import ExamplePosterior posterior = ExamplePosterior () Let us say that we are observing the data point \\(x_o\\) : D = 5 # simulator output was 5-dimensional x_o = torch . ones ( 1 , D ) posterior . set_default_x ( x_o ) The posterior can be used to draw \\(\\theta_{\\text{posterior}}\\) samples: posterior_samples = posterior . sample (( 5_000 ,)) fig , ax = pairplot ( samples = posterior_samples , limits = torch . tensor ([[ - 2.5 , 2.5 ]] * 3 ), offdiag = [ \"kde\" ], diag = [ \"kde\" ], figsize = ( 5 , 5 ), labels = [ rf \"$\\theta_ { d } $\" for d in range ( 3 )], ) Now we can use our simulator to generate some data \\(x_{\\text{PP}}\\) , using as input parameters the poterior samples \\(\\theta_{\\text{posterior}}\\) . Note that the simulation part is not in the sbi scope, so any simulator -including a non-Python one- can be used at this stage. In our case we\u2019ll use a dummy simulator: def dummy_simulator ( posterior_samples : torch . Tensor , * args , ** kwargs ) -> torch . Tensor : sample_size = posterior_samples . shape [ 0 ] scale = 1.0 shift = torch . distributions . Gumbel ( loc = torch . zeros ( D ), scale = scale / 2 ) . sample () return torch . distributions . Gumbel ( loc = x_o [ 0 ] + shift , scale = scale ) . sample ( ( sample_size ,) ) x_pp = dummy_simulator ( posterior_samples ) Plotting \\(x_o\\) against the \\(x_{\\text{pp}}\\) , we perform a PPC that plays the role of a sanity check. In this case, the check indicates that \\(x_o\\) falls right within the support of \\(x_{\\text{pp}}\\) , which should make the experimenter rather confident about the estimated posterior : _ = pairplot ( samples = x_pp , points = x_o [ 0 ], limits = torch . tensor ([[ - 2.0 , 5.0 ]] * 5 ), points_colors = \"red\" , figsize = ( 8 , 8 ), offdiag = \"scatter\" , scatter_offdiag = dict ( marker = \".\" , s = 5 ), points_offdiag = dict ( marker = \"+\" , markersize = 20 ), labels = [ rf \"$x_ { d } $\" for d in range ( D )], ) In contrast, \\(x_o\\) falling well outside the support of \\(x_{\\text{pp}}\\) is indicative of a failure to estimate the correct posterior. Here we simulate such a failure mode: error_shift = - 2.0 * torch . ones ( 1 , 5 ) _ = pairplot ( samples = x_pp , points = x_o [ 0 ] + error_shift , limits = torch . tensor ([[ - 2.0 , 5.0 ]] * 5 ), points_colors = \"red\" , figsize = ( 8 , 8 ), offdiag = \"scatter\" , scatter_offdiag = dict ( marker = \".\" , s = 5 ), points_offdiag = dict ( marker = \"+\" , markersize = 20 ), labels = [ rf \"$x_ { d } $\" for d in range ( D )], ) A typical way to investigate this issue would be to run a prior* predictive check , applying the same plotting strategy, but drawing \\(\\theta\\) from the prior instead of the posterior. **The support for \\(x_{\\text{pp}}\\) should be larger and should contain \\(x_o\\) * . If this check is successful, the \u201cblame\u201d can then be shifted to the inference (method used, convergence of density estimators, number of sequential rounds, etc\u2026).","title":"Performing a PPC over a toy example"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/","text":"Simulation-based Calibration in SBI \u00b6 After a density estimator has been trained with simulated data to obtain a posterior, the estimator should be made subject to several diagnostic tests, before being used for inference given the actual observed data. Posterior Predictive Checks (see tutorial 12) provide one way to \u201ccritique\u201d a trained estimator via its predictive performance. Another important approach to such diagnostics is simulation-based calibration as reported by Talts et al, 2018 . Simulation-based calibration (SBC) provides a (qualitative) view and a quantitive measure to check, whether the uncertainties of the posterior are balanced, i.e., neither over-confident nor under-confident. As such, SBC can be viewed as a necessary condition (but not sufficient) for a valid inference algorithm: If SBC checks fail, this tells you that your inference is invalid. If SBC checks pass, this is no guarantee that the posterior estimation is working. In a nutshell \u00b6 To run SBC, we sample theta_o_i values from the prior of the problem at hand we simulate \u201cobservations\u201d from these parameters: x_o_i = simulator(theta_o_i) we perform inference given each observation x_o_i . This produces a separate posterior \\(p_i(\\theta | x_{o,i})\\) for each of x_o_i . The key step for SBC is to generate a set of posterior samples \\(\\{\\theta\\}_i\\) from each posterior (let\u2019s call this theta_i_s , referring to s samples from posterior \\(p_i(\\theta | x_{o,i})\\) ), and to rank the corresponding theta_o_i under this set of samples. A rank is computed by counting how many samples theta_i_s fall below their corresponding theta_o_i (see section 4.1 in Talts et al.). These ranks are then used to perform the SBC check. Key ideas behind SBC \u00b6 The core idea behind SBC is two fold: SBC ranks of ground truth parameters under the inferred posterior samples follow a uniform distribution. (If the SBC ranks are not uniformly distributed, the posterior is not well calibrated.) samples from the data averaged posterior (ensemble of randomly chosen posterior samples given multiple distinct observations x_o ) are distributed according to the prior What can SBC diagnose? \u00b6 SBC can inform us whether we are not wrong. However, it cannot tell us whether we are right, i.e., SBC checks a necessary condition. For example, imagine you run SBC using the prior as a posterior. The ranks would be perfectly uniform. But the inference would be wrong. The Posterior Predictive Checks (see tutorial 12) can be seen as the complementary sufficient check for the posterior (only as a methaphor, no theoretical guarantees here). Using the prior as a posterior and then doing predictive checks would clearly show that inference failed. To summarize SBC can: tell us whether the SBI method applied to the problem at hand produces posteriors that have well-calibrated uncertainties, and if not, what kind of systematic bias it has: negative or positive bias (shift in the mean of the predictions) or over- or underdispersion (too large or too small variance) A healthy posterior \u00b6 Let\u2019s take the gaussian linear simulator from the previous tutorials and run inference with NPE on it. Note: SBC requires running inference several times. Using SBC with amortized methods like NPE is hence a justified endavour: repeated inference is cheap and SBC can be performed with little runtime penalty. This does not hold for sequential methods or anything relying on MCMC or VI (here, parallelization is your friend, num_workers>1 ). import torch _ = torch . manual_seed ( 10 ) from torch import eye , ones , zeros from torch.distributions import MultivariateNormal from sbi.analysis import check_sbc , run_sbc , get_nltp , sbc_rank_plot from sbi.inference import SNPE , SNPE_C , prepare_for_sbi , simulate_for_sbi from sbi.simulators import linear_gaussian , diagonal_linear_gaussian num_dim = 2 num_simulations = 5_000 prior_mean = ones ( num_dim ) prior_cov = 2 * eye ( num_dim ) prior = MultivariateNormal ( loc = prior_mean , covariance_matrix = prior_cov , validate_args = False ) An ideal case \u00b6 To explore SBC, we make our life easy and assume that we deal with a problem where the likelihood is modelled by an identity mapping and a bit of smear. But to start, we only use an almost vanishing smear of 0.01 . default_likelihood_loc = 0.0 # let's start with 0 shift default_likelihood_scale = 0.01 # let's smear theta only by a little bit def simulator ( theta , loc = default_likelihood_loc , scale = default_likelihood_scale ): \"\"\"linear gaussian inspired by sbibm https://github.com/sbi-benchmark/sbibm/blob/15f068a08a938383116ffd92b92de50c580810a3/sbibm/tasks/gaussian_linear/task.py#L74 \"\"\" num_dim = theta . shape [ - 1 ] cov_ = scale * eye ( num_dim ) # always positively semi-definite # using validate_args=False disables sanity checks on `covariance_matrix` # for the sake of speed value = MultivariateNormal ( loc = ( theta + loc ), covariance_matrix = cov_ , validate_args = False ) . sample () return value theta , x = simulate_for_sbi ( simulator , prior , num_simulations ) Running 5000 simulations.: 0%| | 0/5000 [00:00<?, ?it/s] # let's obtain an observation theta_o = prior . sample (( 1 ,)) x_o = simulator ( theta_o ) print ( \"theta:\" , theta_o . numpy ()) print ( \"x :\" , x_o . numpy ()) theta: [[1.8523536 3.004352 ]] x : [[1.7897944 3.1327424]] # we use a mdn model to have a fast turnaround with training. inferer = SNPE ( prior , density_estimator = \"mdn\" ) # append simulations and run training. inferer . append_simulations ( theta , x ) . train (); Neural network successfully converged after 97 epochs. posterior = inferer . build_posterior () posterior_samples = posterior . sample (( 15_000 ,), x = x_o ) # Generate predictive samples by simulating from posterior samples. posterior_predictive_samples = simulator ( posterior_samples ) Drawing 15000 posterior samples: 0%| | 0/15000 [00:00<?, ?it/s] # let's do some posterior predictive checks to see if the # posterior predictive samples cluster aournd the observation `x_o`. from sbi.analysis import pairplot fig , ax = pairplot ( samples = posterior_predictive_samples , points = x_o , limits = list ( zip ( 0.75 * x_o . flatten (), 1.25 * x_o . flatten ())), offdiag = \"kde\" , diag = \"kde\" , figsize = ( 5 , 5 ), labels = [ rf \"$x_ { d } $\" for d in range ( 3 )], ) The observation x_o falls into the support of the predicted posterior samples, i.e. it is within simulator(posterior_samples) . Given the simulator, this is indicative that our posterior estimates the data well. Running SBC \u00b6 We have a working and trained posterior at this point! Hurray! Let\u2019s look at the SBC metrics now. num_sbc_runs = 1_000 # choose a number of sbc runs, should be ~100s or ideally 1000 # generate ground truth parameters and corresponding simulated observations for SBC. thetas = prior . sample (( num_sbc_runs ,)) xs = simulator ( thetas ) SBC is implemented in sbi for your use on any sbi posterior. To run it, we only need to call run_sbc with appropriate parameters. Note : For amortized neural posteriors (like in this tutorial), execution of sbc is expected to be fast. For posteriors that conduct inference with MCMC and hence are slow, run_sbc exposes the use of multiple internal parallel workers to the user. To use this feature, add num_workers = 2 to the parameters for use of two workers. See the API documentation for details. # run SBC: for each inference we draw 1000 posterior samples. num_posterior_samples = 1_000 ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = num_posterior_samples ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] sbi establishes two methods to do simulation-based calibration: metrics to compare the sbc ranks with a uniform distribution control plots for visual inspections like fig. 1 or 2 in Talts et al, 2018 The ranks count is performed per dimension of theta , i.e. on the 1-D marginal posterior estimates. According to theory, the distribution of these ranks (per dimension of theta ) should turn out to be uniformly distributed. The data average posterior dap (see equation 1 of Talts et al, 2018 ) is yet another metric of interest. It is built from singular random samples of the estimated posterior samples for each xs above. The dap is expected to match the prior distribution used (see equation 1 in Talts et al, 2018 too). check_stats = check_sbc ( ranks , thetas , dap_samples , num_posterior_samples = num_posterior_samples ) The check_stats variable created contains a dictionary with 3 metrics that help to judge our posterior. The \u201cfirst\u201d two compare the ranks to a uniform distribution. Ranks versus Uniform distribution \u00b6 print ( f \"kolmogorov-smirnov p-values \\n check_stats['ks_pvals'] = { check_stats [ 'ks_pvals' ] . numpy () } \" ) kolmogorov-smirnov p-values check_stats['ks_pvals'] = [0.50026375 0.9563929 ] The Kolmogorov-Smirnov (KS test, see also here ) as used by check_sbc provides p-values pvals on the null hypothesis that the samples from ranks are drawn from a uniform distribution (in other words H_0: PDF(ranks) == PDF(uniform) ). We are provided two values as our problem is two-dimensional - one p-value for each dimension. The null hypothesis (of both distributions being equal) is rejected if the p-values fall below a significance threshold (usually < 0.05 ). Therefor, vanishing p-values ( ks_pvals=0 ) are interpreted to indicate a vanishing false positive rate to (mistakenly) consider both distrubtions being \u201cequal\u201d. As we are provided high values of this statistic in dimension 0, samples appear to be from the same distribution, i.e. samples from ranks follow a uniform distribution here. The picture is not so clear cut with dimension 1. This merits a second test. print ( f \"c2st accuracies \\n check_stats['c2st_ranks'] = { check_stats [ 'c2st_ranks' ] . numpy () } \" ) c2st accuracies check_stats['c2st_ranks'] = [0.571 0.564] The second tier of metrics comparing ranks with a uniform distributions is a c2st test (see here for details). This is a nonparametric two sample test based on training a classifier to differented one of the ensembles ( ranks versus samples from a uniform distribution) by being trained on the other. The values reported are the accuracies from cross-validation. If you see values around 0.5 , the classifier was unable to differentiate both ensembles, i.e. ranks are very uniform. If the values are high towards 1 , this matches the case where ranks is very unlike a uniform distribution. Data averaged posterior (DAP) versus prior \u00b6 print ( f \"- c2st accuracies check_stats['c2st_dap'] = { check_stats [ 'c2st_dap' ] . numpy () } \" ) - c2st accuracies check_stats['c2st_dap'] = [0.489 0.507] The last metric reported is again based on c2st computed per dimension of theta . If you see values around 0.5 , the c2st classifier was unable to differentiate both ensembles for each dimension of theta , i.e. dap are much like (if not identical to) the prior. If the values are very high towards 1 , this represents the case where dap is very unlike the prior distribution. Visual Inspection \u00b6 from sbi.analysis import sbc_rank_plot f , ax = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , # by passing None we use a heuristic for the number of bins. ) The two plots visualize the distribution of ranks (here depicted in red) in each dimension. Highlighted in grey you see the 99% confidence interval of a uniform distribution given the number of samples provided. In plain english: for a uniform distribution, we would expect 1 out of 100 (red) bars to lie outside the grey area. We also observe, that the entries fluctuate to some degree. This can be considered a hint that sbc should be conducted with a lot more samples than 1000. A good rule of thumb is that given the number of bins B and the number of SBC samples N (chosed to be 1_000 here) should amount to N / B ~ 20 . f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"cdf\" ) The above provides a visual representation of the cumulative density function (CDF) of ranks (blue and orange for each dimension of theta ) with respect to the 95% confidence interval of a uniform distribution (grey). When things go haywire \u00b6 Next, we would like to explore some pathologies visible in sbc plots which can hint at our estimated posterior being somewhat wrong or completely off. A shifted posterior mean \u00b6 In this scenario we emulate the situation that our posterior estimates incorrectly with a constant shift. We reuse our trained NPE posterior from above and wrap it so that all samples returned expose a constant shift by +0.1 . from utils_13_diagnosis_sbc import BiasedPosterior # this posterior shifts the expected value of the prior by .1 posterior_ = BiasedPosterior ( posterior , shift = 0.1 ) ranks , dap_samples = run_sbc ( thetas , xs , posterior_ ) check_stats = check_sbc ( ranks , thetas , dap_samples , 1_000 ) print ( check_stats ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] {'ks_pvals': tensor([0., 0.]), 'c2st_ranks': tensor([0.6815, 0.6730]), 'c2st_dap': tensor([0.5025, 0.4935])} We can see that the Kolmogorv-Smirnov p-values vanish ( 'ks_pvals': tensor([0., 0.]) ). Thus, we can reject the hypothesis that the ranks PDF is the uniform PDF. The c2st accuracies show values higher than 0.5 . This is indicative that the ranks distribution is not a uniform PDF as well. f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"hist\" , num_bins = 30 ) Inspecting the histograms for both dimenions, the rank distribution is clearly tilted to low rank values for both dimensions. Because we have shifted the expected value of the posterior to higher values (by 0.1 ), we see more entries at low rank values. Let\u2019s try to shift all posterior samples in the opposite direction. We shift the expectation value by -0.1 : posterior_ = BiasedPosterior ( posterior , shift =- 0.1 ) ranks , dap_samples = run_sbc ( thetas , xs , posterior_ ) check_stats = check_sbc ( ranks , thetas , dap_samples , 1_000 ) print ( check_stats ) f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"hist\" , num_bins = 30 ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] {'ks_pvals': tensor([0., 0.]), 'c2st_ranks': tensor([0.6795, 0.6955]), 'c2st_dap': tensor([0.4910, 0.4955])} A similar behavior is observed, but this time we see an upshot of ranks to higher values. Because we have shifted the expected value of the posterior to smaller values, we see an upshot in high rank counts. It is interesting to see that the historgams obtained provide very convincing evidence that this is not a uniform distribution. To conlude at this point, the rank distribution is capable of identifying pathologies of the estimated posterior : a left-skewed rank distribution shows a systematic underestimation of the posterior mean (we shifted the posterior by 0.1 ) a rank-skewed rank distribution shows a systematic overestimation of the posterior mean (we shifted the posterior by -0.1 ) A dispersed posterior \u00b6 In this scenario we emulate the situation if our posterior estimates incorrectly with a dispersion, i.e. the posterior is too wide or too thin. We reuse our trained NPE posterior from above and wrap it so that all samples return a dispersion by 100% more wide ( 2 ), i.e. the variance is overestimated by a factor of 2. from utils_13_diagnosis_sbc import DispersedPosterior # this posterior which disperses the expected posterior value of the prior by 2. posterior_ = DispersedPosterior ( posterior , dispersion = 2.0 ) ranks , dap_samples = run_sbc ( thetas , xs , posterior_ ) check_stats = check_sbc ( ranks , thetas , dap_samples , 1_000 ) print ( check_stats ) f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"hist\" , num_bins = 30 ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] {'ks_pvals': tensor([8.2151e-09, 6.9635e-07]), 'c2st_ranks': tensor([0.6150, 0.6160]), 'c2st_dap': tensor([0.5050, 0.4905])} The rank histograms now look more like a very wide gaussian distribution centered in the middle. The KS p-values again vanish unsurprisingly (we must reject the hypothesis that both distributions are from the same uniform PDF) and the c2st_ranks indicate that the rank histogram is not uniform too. As our posterior samples are distributed too broad now, we obtain more \u201cmedium\u201d range ranks and hence produce the peak of ranks in the center of the histogram. We can repeat this exercise by making our posterior too thin, i.e. the variance of the posterior is too small. Let\u2019s cut it by half. posterior_ = DispersedPosterior ( posterior , dispersion = 0.5 ) ranks , dap_samples = run_sbc ( thetas , xs , posterior_ ) check_stats = check_sbc ( ranks , thetas , dap_samples , 1_000 ) print ( check_stats ) f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"hist\" , num_bins = 30 ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] {'ks_pvals': tensor([1.1753e-07, 1.7929e-08]), 'c2st_ranks': tensor([0.5755, 0.6125]), 'c2st_dap': tensor([0.4980, 0.5075])} The histogram of ranks now shoots above the allowed (greyed) area for a uniform distributed around the extrema. We made the posterior samples too thin, so we received more extreme counts of ranks. The KS p-values vanish again and the c2st metric of the ranks is also larger than .5 which underlines that our rank distribution is not uniformly distributed. We again see, the rank distribution is capable of identifying pathologies of the estimated posterior : a centrally peaked rank distribution shows a systematic over-estimation of the posterior variance (we dispersed the variance of the posterior by a factor of 2 ) a U shaped rank distribution shows a systematic under-estimation of the posterior variance (we dispersed the variance of the posterior by a factor of .5 ) Simulation-based calibration offers a direct handle on which pathology an estimated posterior examines. Outside of this tutorial, you may very well encounter situations with mixtures of effects (a shifted mean and over-estimated variance). Moreover, uncovering a malignant posterior is only the first step to fix your analysis.","title":"Simulation-based calibration"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#simulation-based-calibration-in-sbi","text":"After a density estimator has been trained with simulated data to obtain a posterior, the estimator should be made subject to several diagnostic tests, before being used for inference given the actual observed data. Posterior Predictive Checks (see tutorial 12) provide one way to \u201ccritique\u201d a trained estimator via its predictive performance. Another important approach to such diagnostics is simulation-based calibration as reported by Talts et al, 2018 . Simulation-based calibration (SBC) provides a (qualitative) view and a quantitive measure to check, whether the uncertainties of the posterior are balanced, i.e., neither over-confident nor under-confident. As such, SBC can be viewed as a necessary condition (but not sufficient) for a valid inference algorithm: If SBC checks fail, this tells you that your inference is invalid. If SBC checks pass, this is no guarantee that the posterior estimation is working.","title":"Simulation-based Calibration in SBI"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#in-a-nutshell","text":"To run SBC, we sample theta_o_i values from the prior of the problem at hand we simulate \u201cobservations\u201d from these parameters: x_o_i = simulator(theta_o_i) we perform inference given each observation x_o_i . This produces a separate posterior \\(p_i(\\theta | x_{o,i})\\) for each of x_o_i . The key step for SBC is to generate a set of posterior samples \\(\\{\\theta\\}_i\\) from each posterior (let\u2019s call this theta_i_s , referring to s samples from posterior \\(p_i(\\theta | x_{o,i})\\) ), and to rank the corresponding theta_o_i under this set of samples. A rank is computed by counting how many samples theta_i_s fall below their corresponding theta_o_i (see section 4.1 in Talts et al.). These ranks are then used to perform the SBC check.","title":"In a nutshell"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#key-ideas-behind-sbc","text":"The core idea behind SBC is two fold: SBC ranks of ground truth parameters under the inferred posterior samples follow a uniform distribution. (If the SBC ranks are not uniformly distributed, the posterior is not well calibrated.) samples from the data averaged posterior (ensemble of randomly chosen posterior samples given multiple distinct observations x_o ) are distributed according to the prior","title":"Key ideas behind SBC"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#what-can-sbc-diagnose","text":"SBC can inform us whether we are not wrong. However, it cannot tell us whether we are right, i.e., SBC checks a necessary condition. For example, imagine you run SBC using the prior as a posterior. The ranks would be perfectly uniform. But the inference would be wrong. The Posterior Predictive Checks (see tutorial 12) can be seen as the complementary sufficient check for the posterior (only as a methaphor, no theoretical guarantees here). Using the prior as a posterior and then doing predictive checks would clearly show that inference failed. To summarize SBC can: tell us whether the SBI method applied to the problem at hand produces posteriors that have well-calibrated uncertainties, and if not, what kind of systematic bias it has: negative or positive bias (shift in the mean of the predictions) or over- or underdispersion (too large or too small variance)","title":"What can SBC diagnose?"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#a-healthy-posterior","text":"Let\u2019s take the gaussian linear simulator from the previous tutorials and run inference with NPE on it. Note: SBC requires running inference several times. Using SBC with amortized methods like NPE is hence a justified endavour: repeated inference is cheap and SBC can be performed with little runtime penalty. This does not hold for sequential methods or anything relying on MCMC or VI (here, parallelization is your friend, num_workers>1 ). import torch _ = torch . manual_seed ( 10 ) from torch import eye , ones , zeros from torch.distributions import MultivariateNormal from sbi.analysis import check_sbc , run_sbc , get_nltp , sbc_rank_plot from sbi.inference import SNPE , SNPE_C , prepare_for_sbi , simulate_for_sbi from sbi.simulators import linear_gaussian , diagonal_linear_gaussian num_dim = 2 num_simulations = 5_000 prior_mean = ones ( num_dim ) prior_cov = 2 * eye ( num_dim ) prior = MultivariateNormal ( loc = prior_mean , covariance_matrix = prior_cov , validate_args = False )","title":"A healthy posterior"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#an-ideal-case","text":"To explore SBC, we make our life easy and assume that we deal with a problem where the likelihood is modelled by an identity mapping and a bit of smear. But to start, we only use an almost vanishing smear of 0.01 . default_likelihood_loc = 0.0 # let's start with 0 shift default_likelihood_scale = 0.01 # let's smear theta only by a little bit def simulator ( theta , loc = default_likelihood_loc , scale = default_likelihood_scale ): \"\"\"linear gaussian inspired by sbibm https://github.com/sbi-benchmark/sbibm/blob/15f068a08a938383116ffd92b92de50c580810a3/sbibm/tasks/gaussian_linear/task.py#L74 \"\"\" num_dim = theta . shape [ - 1 ] cov_ = scale * eye ( num_dim ) # always positively semi-definite # using validate_args=False disables sanity checks on `covariance_matrix` # for the sake of speed value = MultivariateNormal ( loc = ( theta + loc ), covariance_matrix = cov_ , validate_args = False ) . sample () return value theta , x = simulate_for_sbi ( simulator , prior , num_simulations ) Running 5000 simulations.: 0%| | 0/5000 [00:00<?, ?it/s] # let's obtain an observation theta_o = prior . sample (( 1 ,)) x_o = simulator ( theta_o ) print ( \"theta:\" , theta_o . numpy ()) print ( \"x :\" , x_o . numpy ()) theta: [[1.8523536 3.004352 ]] x : [[1.7897944 3.1327424]] # we use a mdn model to have a fast turnaround with training. inferer = SNPE ( prior , density_estimator = \"mdn\" ) # append simulations and run training. inferer . append_simulations ( theta , x ) . train (); Neural network successfully converged after 97 epochs. posterior = inferer . build_posterior () posterior_samples = posterior . sample (( 15_000 ,), x = x_o ) # Generate predictive samples by simulating from posterior samples. posterior_predictive_samples = simulator ( posterior_samples ) Drawing 15000 posterior samples: 0%| | 0/15000 [00:00<?, ?it/s] # let's do some posterior predictive checks to see if the # posterior predictive samples cluster aournd the observation `x_o`. from sbi.analysis import pairplot fig , ax = pairplot ( samples = posterior_predictive_samples , points = x_o , limits = list ( zip ( 0.75 * x_o . flatten (), 1.25 * x_o . flatten ())), offdiag = \"kde\" , diag = \"kde\" , figsize = ( 5 , 5 ), labels = [ rf \"$x_ { d } $\" for d in range ( 3 )], ) The observation x_o falls into the support of the predicted posterior samples, i.e. it is within simulator(posterior_samples) . Given the simulator, this is indicative that our posterior estimates the data well.","title":"An ideal case"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#running-sbc","text":"We have a working and trained posterior at this point! Hurray! Let\u2019s look at the SBC metrics now. num_sbc_runs = 1_000 # choose a number of sbc runs, should be ~100s or ideally 1000 # generate ground truth parameters and corresponding simulated observations for SBC. thetas = prior . sample (( num_sbc_runs ,)) xs = simulator ( thetas ) SBC is implemented in sbi for your use on any sbi posterior. To run it, we only need to call run_sbc with appropriate parameters. Note : For amortized neural posteriors (like in this tutorial), execution of sbc is expected to be fast. For posteriors that conduct inference with MCMC and hence are slow, run_sbc exposes the use of multiple internal parallel workers to the user. To use this feature, add num_workers = 2 to the parameters for use of two workers. See the API documentation for details. # run SBC: for each inference we draw 1000 posterior samples. num_posterior_samples = 1_000 ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = num_posterior_samples ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] sbi establishes two methods to do simulation-based calibration: metrics to compare the sbc ranks with a uniform distribution control plots for visual inspections like fig. 1 or 2 in Talts et al, 2018 The ranks count is performed per dimension of theta , i.e. on the 1-D marginal posterior estimates. According to theory, the distribution of these ranks (per dimension of theta ) should turn out to be uniformly distributed. The data average posterior dap (see equation 1 of Talts et al, 2018 ) is yet another metric of interest. It is built from singular random samples of the estimated posterior samples for each xs above. The dap is expected to match the prior distribution used (see equation 1 in Talts et al, 2018 too). check_stats = check_sbc ( ranks , thetas , dap_samples , num_posterior_samples = num_posterior_samples ) The check_stats variable created contains a dictionary with 3 metrics that help to judge our posterior. The \u201cfirst\u201d two compare the ranks to a uniform distribution.","title":"Running SBC"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#ranks-versus-uniform-distribution","text":"print ( f \"kolmogorov-smirnov p-values \\n check_stats['ks_pvals'] = { check_stats [ 'ks_pvals' ] . numpy () } \" ) kolmogorov-smirnov p-values check_stats['ks_pvals'] = [0.50026375 0.9563929 ] The Kolmogorov-Smirnov (KS test, see also here ) as used by check_sbc provides p-values pvals on the null hypothesis that the samples from ranks are drawn from a uniform distribution (in other words H_0: PDF(ranks) == PDF(uniform) ). We are provided two values as our problem is two-dimensional - one p-value for each dimension. The null hypothesis (of both distributions being equal) is rejected if the p-values fall below a significance threshold (usually < 0.05 ). Therefor, vanishing p-values ( ks_pvals=0 ) are interpreted to indicate a vanishing false positive rate to (mistakenly) consider both distrubtions being \u201cequal\u201d. As we are provided high values of this statistic in dimension 0, samples appear to be from the same distribution, i.e. samples from ranks follow a uniform distribution here. The picture is not so clear cut with dimension 1. This merits a second test. print ( f \"c2st accuracies \\n check_stats['c2st_ranks'] = { check_stats [ 'c2st_ranks' ] . numpy () } \" ) c2st accuracies check_stats['c2st_ranks'] = [0.571 0.564] The second tier of metrics comparing ranks with a uniform distributions is a c2st test (see here for details). This is a nonparametric two sample test based on training a classifier to differented one of the ensembles ( ranks versus samples from a uniform distribution) by being trained on the other. The values reported are the accuracies from cross-validation. If you see values around 0.5 , the classifier was unable to differentiate both ensembles, i.e. ranks are very uniform. If the values are high towards 1 , this matches the case where ranks is very unlike a uniform distribution.","title":"Ranks versus Uniform distribution"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#data-averaged-posterior-dap-versus-prior","text":"print ( f \"- c2st accuracies check_stats['c2st_dap'] = { check_stats [ 'c2st_dap' ] . numpy () } \" ) - c2st accuracies check_stats['c2st_dap'] = [0.489 0.507] The last metric reported is again based on c2st computed per dimension of theta . If you see values around 0.5 , the c2st classifier was unable to differentiate both ensembles for each dimension of theta , i.e. dap are much like (if not identical to) the prior. If the values are very high towards 1 , this represents the case where dap is very unlike the prior distribution.","title":"Data averaged posterior (DAP) versus prior"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#visual-inspection","text":"from sbi.analysis import sbc_rank_plot f , ax = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , # by passing None we use a heuristic for the number of bins. ) The two plots visualize the distribution of ranks (here depicted in red) in each dimension. Highlighted in grey you see the 99% confidence interval of a uniform distribution given the number of samples provided. In plain english: for a uniform distribution, we would expect 1 out of 100 (red) bars to lie outside the grey area. We also observe, that the entries fluctuate to some degree. This can be considered a hint that sbc should be conducted with a lot more samples than 1000. A good rule of thumb is that given the number of bins B and the number of SBC samples N (chosed to be 1_000 here) should amount to N / B ~ 20 . f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"cdf\" ) The above provides a visual representation of the cumulative density function (CDF) of ranks (blue and orange for each dimension of theta ) with respect to the 95% confidence interval of a uniform distribution (grey).","title":"Visual Inspection"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#when-things-go-haywire","text":"Next, we would like to explore some pathologies visible in sbc plots which can hint at our estimated posterior being somewhat wrong or completely off.","title":"When things go haywire"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#a-shifted-posterior-mean","text":"In this scenario we emulate the situation that our posterior estimates incorrectly with a constant shift. We reuse our trained NPE posterior from above and wrap it so that all samples returned expose a constant shift by +0.1 . from utils_13_diagnosis_sbc import BiasedPosterior # this posterior shifts the expected value of the prior by .1 posterior_ = BiasedPosterior ( posterior , shift = 0.1 ) ranks , dap_samples = run_sbc ( thetas , xs , posterior_ ) check_stats = check_sbc ( ranks , thetas , dap_samples , 1_000 ) print ( check_stats ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] {'ks_pvals': tensor([0., 0.]), 'c2st_ranks': tensor([0.6815, 0.6730]), 'c2st_dap': tensor([0.5025, 0.4935])} We can see that the Kolmogorv-Smirnov p-values vanish ( 'ks_pvals': tensor([0., 0.]) ). Thus, we can reject the hypothesis that the ranks PDF is the uniform PDF. The c2st accuracies show values higher than 0.5 . This is indicative that the ranks distribution is not a uniform PDF as well. f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"hist\" , num_bins = 30 ) Inspecting the histograms for both dimenions, the rank distribution is clearly tilted to low rank values for both dimensions. Because we have shifted the expected value of the posterior to higher values (by 0.1 ), we see more entries at low rank values. Let\u2019s try to shift all posterior samples in the opposite direction. We shift the expectation value by -0.1 : posterior_ = BiasedPosterior ( posterior , shift =- 0.1 ) ranks , dap_samples = run_sbc ( thetas , xs , posterior_ ) check_stats = check_sbc ( ranks , thetas , dap_samples , 1_000 ) print ( check_stats ) f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"hist\" , num_bins = 30 ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] {'ks_pvals': tensor([0., 0.]), 'c2st_ranks': tensor([0.6795, 0.6955]), 'c2st_dap': tensor([0.4910, 0.4955])} A similar behavior is observed, but this time we see an upshot of ranks to higher values. Because we have shifted the expected value of the posterior to smaller values, we see an upshot in high rank counts. It is interesting to see that the historgams obtained provide very convincing evidence that this is not a uniform distribution. To conlude at this point, the rank distribution is capable of identifying pathologies of the estimated posterior : a left-skewed rank distribution shows a systematic underestimation of the posterior mean (we shifted the posterior by 0.1 ) a rank-skewed rank distribution shows a systematic overestimation of the posterior mean (we shifted the posterior by -0.1 )","title":"A shifted posterior mean"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#a-dispersed-posterior","text":"In this scenario we emulate the situation if our posterior estimates incorrectly with a dispersion, i.e. the posterior is too wide or too thin. We reuse our trained NPE posterior from above and wrap it so that all samples return a dispersion by 100% more wide ( 2 ), i.e. the variance is overestimated by a factor of 2. from utils_13_diagnosis_sbc import DispersedPosterior # this posterior which disperses the expected posterior value of the prior by 2. posterior_ = DispersedPosterior ( posterior , dispersion = 2.0 ) ranks , dap_samples = run_sbc ( thetas , xs , posterior_ ) check_stats = check_sbc ( ranks , thetas , dap_samples , 1_000 ) print ( check_stats ) f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"hist\" , num_bins = 30 ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] {'ks_pvals': tensor([8.2151e-09, 6.9635e-07]), 'c2st_ranks': tensor([0.6150, 0.6160]), 'c2st_dap': tensor([0.5050, 0.4905])} The rank histograms now look more like a very wide gaussian distribution centered in the middle. The KS p-values again vanish unsurprisingly (we must reject the hypothesis that both distributions are from the same uniform PDF) and the c2st_ranks indicate that the rank histogram is not uniform too. As our posterior samples are distributed too broad now, we obtain more \u201cmedium\u201d range ranks and hence produce the peak of ranks in the center of the histogram. We can repeat this exercise by making our posterior too thin, i.e. the variance of the posterior is too small. Let\u2019s cut it by half. posterior_ = DispersedPosterior ( posterior , dispersion = 0.5 ) ranks , dap_samples = run_sbc ( thetas , xs , posterior_ ) check_stats = check_sbc ( ranks , thetas , dap_samples , 1_000 ) print ( check_stats ) f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"hist\" , num_bins = 30 ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] {'ks_pvals': tensor([1.1753e-07, 1.7929e-08]), 'c2st_ranks': tensor([0.5755, 0.6125]), 'c2st_dap': tensor([0.4980, 0.5075])} The histogram of ranks now shoots above the allowed (greyed) area for a uniform distributed around the extrema. We made the posterior samples too thin, so we received more extreme counts of ranks. The KS p-values vanish again and the c2st metric of the ranks is also larger than .5 which underlines that our rank distribution is not uniformly distributed. We again see, the rank distribution is capable of identifying pathologies of the estimated posterior : a centrally peaked rank distribution shows a systematic over-estimation of the posterior variance (we dispersed the variance of the posterior by a factor of 2 ) a U shaped rank distribution shows a systematic under-estimation of the posterior variance (we dispersed the variance of the posterior by a factor of .5 ) Simulation-based calibration offers a direct handle on which pathology an estimated posterior examines. Outside of this tutorial, you may very well encounter situations with mixtures of effects (a shifted mean and over-estimated variance). Moreover, uncovering a malignant posterior is only the first step to fix your analysis.","title":"A dispersed posterior"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/","text":"SBI with iid data and permutation-invariant embeddings \u00b6 There are scenarios in which we observe multiple data points per experiment and we can assume that they are independent and identically distributed (iid, i.e., they are assumed to have the same underlying model parameters). For example, in a decision-making experiments, the experiment is often repeated in trials with the same experimental settings and conditions. The corresponding set of trials is then assumed to be \u201ciid\u201d. In such a scenario, we may want to obtain the posterior given a set of observation \\(p(\\theta | X=\\{x_i\\}_i^N)\\) . Amortization of neural network training: iid-inference with NLE / NRE \u00b6 For some SBI variants the iid assumption can be exploited: when using a likelihood-based SBI method ( SNLE , SNRE ) one can train the density or ratio estimator on single-trial data, and then perform inference with MCMC . Crucially, because the data is iid and the estimator is trained on single-trial data, one can repeat the inference with a different x_o (a different set of trials, or different number of trials) without having to retrain the density estimator. One can interpet this as amortization of the SBI training: we can obtain a neural likelihood, or likelihood-ratio estimate for new x_o s without retraining, but we still have to run MCMC or VI to do inference. In addition, one can not only change the number of trials of a new x_o , but also the entire inference setting. For example, one can apply hierarchical inference scenarios with changing hierarchical denpendencies between the model parameters\u2013all without having to retrain the density estimator because that is based on estimating single-trail likelihoods. Full amortization: iid-inference with NPE and permutation-invariant embedding nets \u00b6 When performing neural posterior estimation ( SNPE ) we cannot exploit the iid assumption directly because we are learning a density estimator in theta . Thus, the underlying neural network takes x as input and predicts the parameters of the density estimator. As a consequence, if x is a set of iid observations \\(X=\\{x_i\\}_i^N\\) then the neural network has to be invariant to permutations of this set, i.e., it has to be permutation invariant. Overall, this means that we can use SNPE for inference with iid data, however, we need to provide a corresponding embedding network that handles the iid-data and is permutation invariant. This will likely require some hyperparameter tuning and more training data for the inference to work accurately. But once we have this, the inference is fully amortized, i.e., we can get new posterior samples basically instantly without retraining and without running MCMC or VI . Let us first have a look how trial-based inference works in SBI before we discuss models with \u201cmixed data types\u201d. SBI with trial-based data \u00b6 For illustration we use a simple linear Gaussian simulator, as in previous tutorials. The simulator takes a single parameter (vector), the mean of the Gaussian, and its variance is set to one. We define a Gaussian prior over the mean and perform inference. The observed data is again a from a Gaussian with some fixed \u201cground-truth\u201d parameter \\(\\theta_o\\) . Crucially, the observed data x_o can consist of multiple samples given the same ground-truth parameters and these samples are then iid: \\[ \\theta \\sim \\mathcal{N}(\\mu_0,\\; \\Sigma_0) \\\\ x | \\theta \\sim \\mathcal{N}(\\theta,\\; \\Sigma=I) \\\\ \\mathbf{x_o} = \\{x_o^i\\}_{i=1}^N \\sim \\mathcal{N}(\\theta_o,\\; \\Sigma=I) \\] For this toy problem the ground-truth posterior is well defined, it is again a Gaussian, centered on the mean of \\(\\mathbf{x_o}\\) and with variance scaled by the number of trials \\(N\\) , i.e., the more trials we observe, the more information about the underlying \\(\\theta_o\\) we have and the more concentrated the posteriors becomes. We will illustrate this below: import torch import matplotlib.pyplot as plt from torch import zeros , ones , eye from torch.distributions import MultivariateNormal from sbi.inference import SNLE , SNPE , prepare_for_sbi , simulate_for_sbi from sbi.analysis import pairplot from sbi.utils.metrics import c2st from sbi.simulators.linear_gaussian import ( linear_gaussian , true_posterior_linear_gaussian_mvn_prior , ) # Seeding torch . manual_seed ( 1 ); # Gaussian simulator theta_dim = 2 x_dim = theta_dim # likelihood_mean will be likelihood_shift+theta likelihood_shift = - 1.0 * zeros ( x_dim ) likelihood_cov = 0.3 * eye ( x_dim ) prior_mean = zeros ( theta_dim ) prior_cov = eye ( theta_dim ) prior = MultivariateNormal ( loc = prior_mean , covariance_matrix = prior_cov ) # Define Gaussian simulator simulator , prior = prepare_for_sbi ( lambda theta : linear_gaussian ( theta , likelihood_shift , likelihood_cov ), prior ) # Use built-in function to obtain ground-truth posterior given x_o def get_true_posterior_samples ( x_o , num_samples = 1 ): return true_posterior_linear_gaussian_mvn_prior ( x_o , likelihood_shift , likelihood_cov , prior_mean , prior_cov ) . sample (( num_samples ,)) The analytical posterior concentrates around true parameters with increasing number of IID trials \u00b6 num_trials = [ 1 , 5 , 15 , 20 ] theta_o = zeros ( 1 , theta_dim ) # Generate multiple x_os with increasing number of trials. xos = [ theta_o . repeat ( nt , 1 ) for nt in num_trials ] # Obtain analytical posterior samples for each of them. true_samples = [ get_true_posterior_samples ( xo , 5000 ) for xo in xos ] # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( true_samples , points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter. IID inference with NLE \u00b6 (S)NLE can easily perform inference given multiple IID x because it is based on learning the likelihood. Once the likelihood is learned on single trials, i.e., a neural network that given a single observation and a parameter predicts the likelihood of that observation given the parameter, one can perform MCMC to obtain posterior samples. MCMC relies on evaluating ratios of likelihoods of candidate parameters to either accept or reject them to be posterior samples. When inferring the posterior given multiple IID observation, these likelihoods are just the joint likelihoods of each IID observation given the current parameter candidate. Thus, given a neural likelihood from SNLE, we can calculate these joint likelihoods and perform MCMC given IID data, we just have to multiply together (or add in log-space) the individual trial-likelihoods ( sbi takes care of that). # Train SNLE. inferer = SNLE ( prior , show_progress_bars = True , density_estimator = \"mdn\" ) theta , x = simulate_for_sbi ( simulator , prior , 10000 , simulation_batch_size = 1000 ) inferer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ); Running 10000 simulations.: 0%| | 0/10000 [00:00<?, ?it/s] Neural network successfully converged after 43 epochs. # Obtain posterior samples for different number of iid xos. nle_samples = [] num_samples = 5000 mcmc_parameters = dict ( num_chains = 50 , thin = 10 , warmup_steps = 50 , init_strategy = \"proposal\" , ) mcmc_method = \"slice_np_vectorized\" posterior = inferer . build_posterior ( mcmc_method = mcmc_method , mcmc_parameters = mcmc_parameters , ) # Generate samples with MCMC given the same set of x_os as above. for xo in xos : nle_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xo )) Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 5 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 15 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 20 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] Note that sbi warns about iid-x with increasing number of trial here. We ignore the warning because that\u2019s exactly what we want to do. # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( nle_samples , points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); The pairplot above already indicates that (S)NLE is well able to obtain accurate posterior samples also for increasing number of trials (note that we trained the single-round version of SNLE so that we did not have to re-train it for new \\(x_o\\) ). Quantitatively we can measure the accuracy of SNLE by calculating the c2st score between SNLE and the true posterior samples, where the best accuracy is perfect for 0.5 : cs = [ c2st ( torch . from_numpy ( s1 ), torch . from_numpy ( s2 )) for s1 , s2 in zip ( true_samples , nle_samples ) ] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.51 c2st score for num_trials=15: 0.51 c2st score for num_trials=20: 0.51 IID inference with NPE using permutation-invariant embedding nets \u00b6 For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials. We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net. To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021). To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. As it is difficult to handle tensors of varying lengths in the SBI training loop, we construct a training data set in which \u201cunobserved\u201d trials are mask by NaNs (and ignore the resulting SBI warning about NaNs in the training data). Construct training data set. \u00b6 # we need to fix the maximum number of trials. max_num_trials = 20 # construct training data set: we want to cover the full range of possible number of # trials num_training_samples = 5000 theta = prior . sample (( num_training_samples ,)) # there are certainly smarter ways to construct the training data set, but we go with a # for loop here for illustration purposes. x = torch . ones ( num_training_samples * max_num_trials , max_num_trials , x_dim ) * float ( \"nan\" ) for i in range ( num_training_samples ): xi = simulator ( theta [ i ] . repeat ( max_num_trials , 1 )) for j in range ( max_num_trials ): x [ i * max_num_trials + j , : j + 1 , :] = xi [: j + 1 , :] theta = theta . repeat_interleave ( max_num_trials , dim = 0 ) Build embedding net \u00b6 from sbi.neural_nets.embedding_nets import ( FCEmbedding , PermutationInvariantEmbedding , ) from sbi.utils import posterior_nn # embedding latent_dim = 10 single_trial_net = FCEmbedding ( input_dim = theta_dim , num_hiddens = 40 , num_layers = 2 , output_dim = latent_dim , ) embedding_net = PermutationInvariantEmbedding ( single_trial_net , trial_net_output_dim = latent_dim , # NOTE: post-embedding is not needed really. num_layers = 1 , num_hiddens = 10 , output_dim = 10 , ) # we choose a simple MDN as the density estimator. # NOTE: we turn off z-scoring of the data, as we used NaNs for the missing trials. density_estimator = posterior_nn ( \"mdn\" , embedding_net = embedding_net , z_score_x = \"none\" ) Run training \u00b6 inference = SNPE ( prior , density_estimator = density_estimator ) # NOTE: we don't exclude invalid x because we used NaNs for the missing trials. inference . append_simulations ( theta , x , exclude_invalid_x = False , ) . train ( training_batch_size = 1000 ) posterior = inference . build_posterior () WARNING:root:Found 95000 NaN simulations and 0 Inf simulations. They are not excluded from training due to `exclude_invalid_x=False`.Training will likely fail, we strongly recommend `exclude_invalid_x=True` for Single-round NPE. Neural network successfully converged after 168 epochs. Amortized inference \u00b6 Comparing runtimes, we see that the NPE training takes a bit longer than the training on single trials for NLE above. However, we trained the density estimator such that it can handle multiple and changing number of iid trials (up to 20). Thus, we can obtain posterior samples for different x_o with just a single forward pass instead of having to run MCMC for each new observation. As you can see below, the c2st score for increasing number of observed trials remains close to the ideal 0.5 . npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) cs = [ c2st ( torch . from_numpy ( s1 ), s2 ) for s1 , s2 in zip ( true_samples , npe_samples )] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.50 c2st score for num_trials=15: 0.52 c2st score for num_trials=20: 0.55 num_trials = [ 1 , 5 , 15 , 20 ] xos = [ theta_o . repeat ( nt , 1 ) for nt in num_trials ] npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( npe_samples , points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] # We can easily obtain posteriors for many different x_os, instantly, because NPE is fully amortized: num_trials = [ 2 , 4 , 6 , 8 , 12 , 14 , 18 ] npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( npe_samples , points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s]","title":"SBI with trial-based data"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#sbi-with-iid-data-and-permutation-invariant-embeddings","text":"There are scenarios in which we observe multiple data points per experiment and we can assume that they are independent and identically distributed (iid, i.e., they are assumed to have the same underlying model parameters). For example, in a decision-making experiments, the experiment is often repeated in trials with the same experimental settings and conditions. The corresponding set of trials is then assumed to be \u201ciid\u201d. In such a scenario, we may want to obtain the posterior given a set of observation \\(p(\\theta | X=\\{x_i\\}_i^N)\\) .","title":"SBI with iid data and permutation-invariant embeddings"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#amortization-of-neural-network-training-iid-inference-with-nle-nre","text":"For some SBI variants the iid assumption can be exploited: when using a likelihood-based SBI method ( SNLE , SNRE ) one can train the density or ratio estimator on single-trial data, and then perform inference with MCMC . Crucially, because the data is iid and the estimator is trained on single-trial data, one can repeat the inference with a different x_o (a different set of trials, or different number of trials) without having to retrain the density estimator. One can interpet this as amortization of the SBI training: we can obtain a neural likelihood, or likelihood-ratio estimate for new x_o s without retraining, but we still have to run MCMC or VI to do inference. In addition, one can not only change the number of trials of a new x_o , but also the entire inference setting. For example, one can apply hierarchical inference scenarios with changing hierarchical denpendencies between the model parameters\u2013all without having to retrain the density estimator because that is based on estimating single-trail likelihoods.","title":"Amortization of neural network training: iid-inference with NLE / NRE"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#full-amortization-iid-inference-with-npe-and-permutation-invariant-embedding-nets","text":"When performing neural posterior estimation ( SNPE ) we cannot exploit the iid assumption directly because we are learning a density estimator in theta . Thus, the underlying neural network takes x as input and predicts the parameters of the density estimator. As a consequence, if x is a set of iid observations \\(X=\\{x_i\\}_i^N\\) then the neural network has to be invariant to permutations of this set, i.e., it has to be permutation invariant. Overall, this means that we can use SNPE for inference with iid data, however, we need to provide a corresponding embedding network that handles the iid-data and is permutation invariant. This will likely require some hyperparameter tuning and more training data for the inference to work accurately. But once we have this, the inference is fully amortized, i.e., we can get new posterior samples basically instantly without retraining and without running MCMC or VI . Let us first have a look how trial-based inference works in SBI before we discuss models with \u201cmixed data types\u201d.","title":"Full amortization: iid-inference with NPE and permutation-invariant embedding nets"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#sbi-with-trial-based-data","text":"For illustration we use a simple linear Gaussian simulator, as in previous tutorials. The simulator takes a single parameter (vector), the mean of the Gaussian, and its variance is set to one. We define a Gaussian prior over the mean and perform inference. The observed data is again a from a Gaussian with some fixed \u201cground-truth\u201d parameter \\(\\theta_o\\) . Crucially, the observed data x_o can consist of multiple samples given the same ground-truth parameters and these samples are then iid: \\[ \\theta \\sim \\mathcal{N}(\\mu_0,\\; \\Sigma_0) \\\\ x | \\theta \\sim \\mathcal{N}(\\theta,\\; \\Sigma=I) \\\\ \\mathbf{x_o} = \\{x_o^i\\}_{i=1}^N \\sim \\mathcal{N}(\\theta_o,\\; \\Sigma=I) \\] For this toy problem the ground-truth posterior is well defined, it is again a Gaussian, centered on the mean of \\(\\mathbf{x_o}\\) and with variance scaled by the number of trials \\(N\\) , i.e., the more trials we observe, the more information about the underlying \\(\\theta_o\\) we have and the more concentrated the posteriors becomes. We will illustrate this below: import torch import matplotlib.pyplot as plt from torch import zeros , ones , eye from torch.distributions import MultivariateNormal from sbi.inference import SNLE , SNPE , prepare_for_sbi , simulate_for_sbi from sbi.analysis import pairplot from sbi.utils.metrics import c2st from sbi.simulators.linear_gaussian import ( linear_gaussian , true_posterior_linear_gaussian_mvn_prior , ) # Seeding torch . manual_seed ( 1 ); # Gaussian simulator theta_dim = 2 x_dim = theta_dim # likelihood_mean will be likelihood_shift+theta likelihood_shift = - 1.0 * zeros ( x_dim ) likelihood_cov = 0.3 * eye ( x_dim ) prior_mean = zeros ( theta_dim ) prior_cov = eye ( theta_dim ) prior = MultivariateNormal ( loc = prior_mean , covariance_matrix = prior_cov ) # Define Gaussian simulator simulator , prior = prepare_for_sbi ( lambda theta : linear_gaussian ( theta , likelihood_shift , likelihood_cov ), prior ) # Use built-in function to obtain ground-truth posterior given x_o def get_true_posterior_samples ( x_o , num_samples = 1 ): return true_posterior_linear_gaussian_mvn_prior ( x_o , likelihood_shift , likelihood_cov , prior_mean , prior_cov ) . sample (( num_samples ,))","title":"SBI with trial-based data"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#the-analytical-posterior-concentrates-around-true-parameters-with-increasing-number-of-iid-trials","text":"num_trials = [ 1 , 5 , 15 , 20 ] theta_o = zeros ( 1 , theta_dim ) # Generate multiple x_os with increasing number of trials. xos = [ theta_o . repeat ( nt , 1 ) for nt in num_trials ] # Obtain analytical posterior samples for each of them. true_samples = [ get_true_posterior_samples ( xo , 5000 ) for xo in xos ] # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( true_samples , points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter.","title":"The analytical posterior concentrates around true parameters with increasing number of IID trials"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#iid-inference-with-nle","text":"(S)NLE can easily perform inference given multiple IID x because it is based on learning the likelihood. Once the likelihood is learned on single trials, i.e., a neural network that given a single observation and a parameter predicts the likelihood of that observation given the parameter, one can perform MCMC to obtain posterior samples. MCMC relies on evaluating ratios of likelihoods of candidate parameters to either accept or reject them to be posterior samples. When inferring the posterior given multiple IID observation, these likelihoods are just the joint likelihoods of each IID observation given the current parameter candidate. Thus, given a neural likelihood from SNLE, we can calculate these joint likelihoods and perform MCMC given IID data, we just have to multiply together (or add in log-space) the individual trial-likelihoods ( sbi takes care of that). # Train SNLE. inferer = SNLE ( prior , show_progress_bars = True , density_estimator = \"mdn\" ) theta , x = simulate_for_sbi ( simulator , prior , 10000 , simulation_batch_size = 1000 ) inferer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ); Running 10000 simulations.: 0%| | 0/10000 [00:00<?, ?it/s] Neural network successfully converged after 43 epochs. # Obtain posterior samples for different number of iid xos. nle_samples = [] num_samples = 5000 mcmc_parameters = dict ( num_chains = 50 , thin = 10 , warmup_steps = 50 , init_strategy = \"proposal\" , ) mcmc_method = \"slice_np_vectorized\" posterior = inferer . build_posterior ( mcmc_method = mcmc_method , mcmc_parameters = mcmc_parameters , ) # Generate samples with MCMC given the same set of x_os as above. for xo in xos : nle_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xo )) Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 5 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 15 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 20 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] Note that sbi warns about iid-x with increasing number of trial here. We ignore the warning because that\u2019s exactly what we want to do. # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( nle_samples , points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); The pairplot above already indicates that (S)NLE is well able to obtain accurate posterior samples also for increasing number of trials (note that we trained the single-round version of SNLE so that we did not have to re-train it for new \\(x_o\\) ). Quantitatively we can measure the accuracy of SNLE by calculating the c2st score between SNLE and the true posterior samples, where the best accuracy is perfect for 0.5 : cs = [ c2st ( torch . from_numpy ( s1 ), torch . from_numpy ( s2 )) for s1 , s2 in zip ( true_samples , nle_samples ) ] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.51 c2st score for num_trials=15: 0.51 c2st score for num_trials=20: 0.51","title":"IID inference with NLE"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#iid-inference-with-npe-using-permutation-invariant-embedding-nets","text":"For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials. We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net. To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021). To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. As it is difficult to handle tensors of varying lengths in the SBI training loop, we construct a training data set in which \u201cunobserved\u201d trials are mask by NaNs (and ignore the resulting SBI warning about NaNs in the training data).","title":"IID inference with NPE using permutation-invariant embedding nets"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#construct-training-data-set","text":"# we need to fix the maximum number of trials. max_num_trials = 20 # construct training data set: we want to cover the full range of possible number of # trials num_training_samples = 5000 theta = prior . sample (( num_training_samples ,)) # there are certainly smarter ways to construct the training data set, but we go with a # for loop here for illustration purposes. x = torch . ones ( num_training_samples * max_num_trials , max_num_trials , x_dim ) * float ( \"nan\" ) for i in range ( num_training_samples ): xi = simulator ( theta [ i ] . repeat ( max_num_trials , 1 )) for j in range ( max_num_trials ): x [ i * max_num_trials + j , : j + 1 , :] = xi [: j + 1 , :] theta = theta . repeat_interleave ( max_num_trials , dim = 0 )","title":"Construct training data set."},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#build-embedding-net","text":"from sbi.neural_nets.embedding_nets import ( FCEmbedding , PermutationInvariantEmbedding , ) from sbi.utils import posterior_nn # embedding latent_dim = 10 single_trial_net = FCEmbedding ( input_dim = theta_dim , num_hiddens = 40 , num_layers = 2 , output_dim = latent_dim , ) embedding_net = PermutationInvariantEmbedding ( single_trial_net , trial_net_output_dim = latent_dim , # NOTE: post-embedding is not needed really. num_layers = 1 , num_hiddens = 10 , output_dim = 10 , ) # we choose a simple MDN as the density estimator. # NOTE: we turn off z-scoring of the data, as we used NaNs for the missing trials. density_estimator = posterior_nn ( \"mdn\" , embedding_net = embedding_net , z_score_x = \"none\" )","title":"Build embedding net"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#run-training","text":"inference = SNPE ( prior , density_estimator = density_estimator ) # NOTE: we don't exclude invalid x because we used NaNs for the missing trials. inference . append_simulations ( theta , x , exclude_invalid_x = False , ) . train ( training_batch_size = 1000 ) posterior = inference . build_posterior () WARNING:root:Found 95000 NaN simulations and 0 Inf simulations. They are not excluded from training due to `exclude_invalid_x=False`.Training will likely fail, we strongly recommend `exclude_invalid_x=True` for Single-round NPE. Neural network successfully converged after 168 epochs.","title":"Run training"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#amortized-inference","text":"Comparing runtimes, we see that the NPE training takes a bit longer than the training on single trials for NLE above. However, we trained the density estimator such that it can handle multiple and changing number of iid trials (up to 20). Thus, we can obtain posterior samples for different x_o with just a single forward pass instead of having to run MCMC for each new observation. As you can see below, the c2st score for increasing number of observed trials remains close to the ideal 0.5 . npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) cs = [ c2st ( torch . from_numpy ( s1 ), s2 ) for s1 , s2 in zip ( true_samples , npe_samples )] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.50 c2st score for num_trials=15: 0.52 c2st score for num_trials=20: 0.55 num_trials = [ 1 , 5 , 15 , 20 ] xos = [ theta_o . repeat ( nt , 1 ) for nt in num_trials ] npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( npe_samples , points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] # We can easily obtain posteriors for many different x_os, instantly, because NPE is fully amortized: num_trials = [ 2 , 4 , 6 , 8 , 12 , 14 , 18 ] npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( npe_samples , points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s]","title":"Amortized inference"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/","text":"SBI with iid data and permutation-invariant embeddings \u00b6 There are scenarios in which we observe multiple data points per experiment and we can assume that they are independent and identically distributed (iid, i.e., they are assumed to have the same underlying model parameters). For example, in a decision-making experiments, the experiment is often repeated in trials with the same experimental settings and conditions. The corresponding set of trials is then assumed to be \u201ciid\u201d. In such a scenario, we may want to obtain the posterior given a set of observation \\(p(\\theta | X=\\{x_i\\}_i^N)\\) . Amortization of neural network training: iid-inference with NLE / NRE \u00b6 For some SBI variants the iid assumption can be exploited: when using a likelihood-based SBI method ( SNLE , SNRE ) one can train the density or ratio estimator on single-trial data, and then perform inference with MCMC . Crucially, because the data is iid and the estimator is trained on single-trial data, one can repeat the inference with a different x_o (a different set of trials, or different number of trials) without having to retrain the density estimator. One can interpet this as amortization of the SBI training: we can obtain a neural likelihood, or likelihood-ratio estimate for new x_o s without retraining, but we still have to run MCMC or VI to do inference. In addition, one can not only change the number of trials of a new x_o , but also the entire inference setting. For example, one can apply hierarchical inference scenarios with changing hierarchical denpendencies between the model parameters\u2013all without having to retrain the density estimator because that is based on estimating single-trail likelihoods. Full amortization: iid-inference with NPE and permutation-invariant embedding nets \u00b6 When performing neural posterior estimation ( SNPE ) we cannot exploit the iid assumption directly because we are learning a density estimator in theta . Thus, the underlying neural network takes x as input and predicts the parameters of the density estimator. As a consequence, if x is a set of iid observations \\(X=\\{x_i\\}_i^N\\) then the neural network has to be invariant to permutations of this set, i.e., it has to be permutation invariant. Overall, this means that we can use SNPE for inference with iid data, however, we need to provide a corresponding embedding network that handles the iid-data and is permutation invariant. This will likely require some hyperparameter tuning and more training data for the inference to work accurately. But once we have this, the inference is fully amortized, i.e., we can get new posterior samples basically instantly without retraining and without running MCMC or VI . Let us first have a look how trial-based inference works in SBI before we discuss models with \u201cmixed data types\u201d. SBI with trial-based data \u00b6 For illustration we use a simple linear Gaussian simulator, as in previous tutorials. The simulator takes a single parameter (vector), the mean of the Gaussian, and its variance is set to one. We define a Gaussian prior over the mean and perform inference. The observed data is again a from a Gaussian with some fixed \u201cground-truth\u201d parameter \\(\\theta_o\\) . Crucially, the observed data x_o can consist of multiple samples given the same ground-truth parameters and these samples are then iid: \\[ \\theta \\sim \\mathcal{N}(\\mu_0,\\; \\Sigma_0) \\\\ x | \\theta \\sim \\mathcal{N}(\\theta,\\; \\Sigma=I) \\\\ \\mathbf{x_o} = \\{x_o^i\\}_{i=1}^N \\sim \\mathcal{N}(\\theta_o,\\; \\Sigma=I) \\] For this toy problem the ground-truth posterior is well defined, it is again a Gaussian, centered on the mean of \\(\\mathbf{x_o}\\) and with variance scaled by the number of trials \\(N\\) , i.e., the more trials we observe, the more information about the underlying \\(\\theta_o\\) we have and the more concentrated the posteriors becomes. We will illustrate this below: import torch import matplotlib.pyplot as plt from torch import zeros , ones , eye from torch.distributions import MultivariateNormal from sbi.inference import SNLE , SNPE , prepare_for_sbi , simulate_for_sbi from sbi.analysis import pairplot from sbi.utils.metrics import c2st from sbi.simulators.linear_gaussian import ( linear_gaussian , true_posterior_linear_gaussian_mvn_prior , ) # Seeding torch . manual_seed ( 1 ); # Gaussian simulator theta_dim = 2 x_dim = theta_dim # likelihood_mean will be likelihood_shift+theta likelihood_shift = - 1.0 * zeros ( x_dim ) likelihood_cov = 0.3 * eye ( x_dim ) prior_mean = zeros ( theta_dim ) prior_cov = eye ( theta_dim ) prior = MultivariateNormal ( loc = prior_mean , covariance_matrix = prior_cov ) # Define Gaussian simulator simulator , prior = prepare_for_sbi ( lambda theta : linear_gaussian ( theta , likelihood_shift , likelihood_cov ), prior ) # Use built-in function to obtain ground-truth posterior given x_o def get_true_posterior_samples ( x_o , num_samples = 1 ): return true_posterior_linear_gaussian_mvn_prior ( x_o , likelihood_shift , likelihood_cov , prior_mean , prior_cov ) . sample (( num_samples ,)) The analytical posterior concentrates around true parameters with increasing number of IID trials \u00b6 num_trials = [ 1 , 5 , 15 , 20 ] theta_o = zeros ( 1 , theta_dim ) # Generate multiple x_os with increasing number of trials. xos = [ theta_o . repeat ( nt , 1 ) for nt in num_trials ] # Obtain analytical posterior samples for each of them. true_samples = [ get_true_posterior_samples ( xo , 5000 ) for xo in xos ] # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( true_samples , points = theta_o , diag = \"kde\" , offdiag = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter. IID inference with NLE \u00b6 (S)NLE can easily perform inference given multiple IID x because it is based on learning the likelihood. Once the likelihood is learned on single trials, i.e., a neural network that given a single observation and a parameter predicts the likelihood of that observation given the parameter, one can perform MCMC to obtain posterior samples. MCMC relies on evaluating ratios of likelihoods of candidate parameters to either accept or reject them to be posterior samples. When inferring the posterior given multiple IID observation, these likelihoods are just the joint likelihoods of each IID observation given the current parameter candidate. Thus, given a neural likelihood from SNLE, we can calculate these joint likelihoods and perform MCMC given IID data, we just have to multiply together (or add in log-space) the individual trial-likelihoods ( sbi takes care of that). # Train SNLE. inferer = SNLE ( prior , show_progress_bars = True , density_estimator = \"mdn\" ) theta , x = simulate_for_sbi ( simulator , prior , 10000 , simulation_batch_size = 1000 ) inferer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ); Running 10000 simulations.: 0%| | 0/10000 [00:00<?, ?it/s] Neural network successfully converged after 43 epochs. # Obtain posterior samples for different number of iid xos. nle_samples = [] num_samples = 5000 mcmc_parameters = dict ( num_chains = 50 , thin = 10 , warmup_steps = 50 , init_strategy = \"proposal\" , ) mcmc_method = \"slice_np_vectorized\" posterior = inferer . build_posterior ( mcmc_method = mcmc_method , mcmc_parameters = mcmc_parameters , ) # Generate samples with MCMC given the same set of x_os as above. for xo in xos : nle_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xo )) Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 5 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 15 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 20 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] Note that sbi warns about iid-x with increasing number of trial here. We ignore the warning because that\u2019s exactly what we want to do. # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( nle_samples , points = theta_o , diag = \"kde\" , offdiag = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); The pairplot above already indicates that (S)NLE is well able to obtain accurate posterior samples also for increasing number of trials (note that we trained the single-round version of SNLE so that we did not have to re-train it for new \\(x_o\\) ). Quantitatively we can measure the accuracy of SNLE by calculating the c2st score between SNLE and the true posterior samples, where the best accuracy is perfect for 0.5 : cs = [ c2st ( torch . from_numpy ( s1 ), torch . from_numpy ( s2 )) for s1 , s2 in zip ( true_samples , nle_samples ) ] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.51 c2st score for num_trials=15: 0.51 c2st score for num_trials=20: 0.51 IID inference with NPE using permutation-invariant embedding nets \u00b6 For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials. We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net. To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021). To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. As it is difficult to handle tensors of varying lengths in the SBI training loop, we construct a training data set in which \u201cunobserved\u201d trials are mask by NaNs (and ignore the resulting SBI warning about NaNs in the training data). Construct training data set. \u00b6 # we need to fix the maximum number of trials. max_num_trials = 20 # construct training data set: we want to cover the full range of possible number of # trials num_training_samples = 5000 theta = prior . sample (( num_training_samples ,)) # there are certainly smarter ways to construct the training data set, but we go with a # for loop here for illustration purposes. x = torch . ones ( num_training_samples * max_num_trials , max_num_trials , x_dim ) * float ( \"nan\" ) for i in range ( num_training_samples ): xi = simulator ( theta [ i ] . repeat ( max_num_trials , 1 )) for j in range ( max_num_trials ): x [ i * max_num_trials + j , : j + 1 , :] = xi [: j + 1 , :] theta = theta . repeat_interleave ( max_num_trials , dim = 0 ) Build embedding net \u00b6 from sbi.neural_nets.embedding_nets import ( FCEmbedding , PermutationInvariantEmbedding , ) from sbi.utils import posterior_nn # embedding latent_dim = 10 single_trial_net = FCEmbedding ( input_dim = theta_dim , num_hiddens = 40 , num_layers = 2 , output_dim = latent_dim , ) embedding_net = PermutationInvariantEmbedding ( single_trial_net , trial_net_output_dim = latent_dim , # NOTE: post-embedding is not needed really. num_layers = 1 , num_hiddens = 10 , output_dim = 10 , ) # we choose a simple MDN as the density estimator. # NOTE: we turn off z-scoring of the data, as we used NaNs for the missing trials. density_estimator = posterior_nn ( \"mdn\" , embedding_net = embedding_net , z_score_x = \"none\" ) Run training \u00b6 inference = SNPE ( prior , density_estimator = density_estimator ) # NOTE: we don't exclude invalid x because we used NaNs for the missing trials. inference . append_simulations ( theta , x , exclude_invalid_x = False , ) . train ( training_batch_size = 1000 ) posterior = inference . build_posterior () WARNING:root:Found 95000 NaN simulations and 0 Inf simulations. They are not excluded from training due to `exclude_invalid_x=False`.Training will likely fail, we strongly recommend `exclude_invalid_x=True` for Single-round NPE. Neural network successfully converged after 168 epochs. Amortized inference \u00b6 Comparing runtimes, we see that the NPE training takes a bit longer than the training on single trials for NLE above. However, we trained the density estimator such that it can handle multiple and changing number of iid trials (up to 20). Thus, we can obtain posterior samples for different x_o with just a single forward pass instead of having to run MCMC for each new observation. As you can see below, the c2st score for increasing number of observed trials remains close to the ideal 0.5 . npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) cs = [ c2st ( torch . from_numpy ( s1 ), s2 ) for s1 , s2 in zip ( true_samples , npe_samples )] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.50 c2st score for num_trials=15: 0.52 c2st score for num_trials=20: 0.55 num_trials = [ 1 , 5 , 15 , 20 ] xos = [ theta_o . repeat ( nt , 1 ) for nt in num_trials ] npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( npe_samples , points = theta_o , diag = \"kde\" , offdiag = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] # We can easily obtain posteriors for many different x_os, instantly, because NPE is fully amortized: num_trials = [ 2 , 4 , 6 , 8 , 12 , 14 , 18 ] npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( npe_samples , points = theta_o , diag = \"kde\" , offdiag = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s]","title":"SBI with iid data and permutation-invariant embeddings"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#sbi-with-iid-data-and-permutation-invariant-embeddings","text":"There are scenarios in which we observe multiple data points per experiment and we can assume that they are independent and identically distributed (iid, i.e., they are assumed to have the same underlying model parameters). For example, in a decision-making experiments, the experiment is often repeated in trials with the same experimental settings and conditions. The corresponding set of trials is then assumed to be \u201ciid\u201d. In such a scenario, we may want to obtain the posterior given a set of observation \\(p(\\theta | X=\\{x_i\\}_i^N)\\) .","title":"SBI with iid data and permutation-invariant embeddings"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#amortization-of-neural-network-training-iid-inference-with-nle-nre","text":"For some SBI variants the iid assumption can be exploited: when using a likelihood-based SBI method ( SNLE , SNRE ) one can train the density or ratio estimator on single-trial data, and then perform inference with MCMC . Crucially, because the data is iid and the estimator is trained on single-trial data, one can repeat the inference with a different x_o (a different set of trials, or different number of trials) without having to retrain the density estimator. One can interpet this as amortization of the SBI training: we can obtain a neural likelihood, or likelihood-ratio estimate for new x_o s without retraining, but we still have to run MCMC or VI to do inference. In addition, one can not only change the number of trials of a new x_o , but also the entire inference setting. For example, one can apply hierarchical inference scenarios with changing hierarchical denpendencies between the model parameters\u2013all without having to retrain the density estimator because that is based on estimating single-trail likelihoods.","title":"Amortization of neural network training: iid-inference with NLE / NRE"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#full-amortization-iid-inference-with-npe-and-permutation-invariant-embedding-nets","text":"When performing neural posterior estimation ( SNPE ) we cannot exploit the iid assumption directly because we are learning a density estimator in theta . Thus, the underlying neural network takes x as input and predicts the parameters of the density estimator. As a consequence, if x is a set of iid observations \\(X=\\{x_i\\}_i^N\\) then the neural network has to be invariant to permutations of this set, i.e., it has to be permutation invariant. Overall, this means that we can use SNPE for inference with iid data, however, we need to provide a corresponding embedding network that handles the iid-data and is permutation invariant. This will likely require some hyperparameter tuning and more training data for the inference to work accurately. But once we have this, the inference is fully amortized, i.e., we can get new posterior samples basically instantly without retraining and without running MCMC or VI . Let us first have a look how trial-based inference works in SBI before we discuss models with \u201cmixed data types\u201d.","title":"Full amortization: iid-inference with NPE and permutation-invariant embedding nets"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#sbi-with-trial-based-data","text":"For illustration we use a simple linear Gaussian simulator, as in previous tutorials. The simulator takes a single parameter (vector), the mean of the Gaussian, and its variance is set to one. We define a Gaussian prior over the mean and perform inference. The observed data is again a from a Gaussian with some fixed \u201cground-truth\u201d parameter \\(\\theta_o\\) . Crucially, the observed data x_o can consist of multiple samples given the same ground-truth parameters and these samples are then iid: \\[ \\theta \\sim \\mathcal{N}(\\mu_0,\\; \\Sigma_0) \\\\ x | \\theta \\sim \\mathcal{N}(\\theta,\\; \\Sigma=I) \\\\ \\mathbf{x_o} = \\{x_o^i\\}_{i=1}^N \\sim \\mathcal{N}(\\theta_o,\\; \\Sigma=I) \\] For this toy problem the ground-truth posterior is well defined, it is again a Gaussian, centered on the mean of \\(\\mathbf{x_o}\\) and with variance scaled by the number of trials \\(N\\) , i.e., the more trials we observe, the more information about the underlying \\(\\theta_o\\) we have and the more concentrated the posteriors becomes. We will illustrate this below: import torch import matplotlib.pyplot as plt from torch import zeros , ones , eye from torch.distributions import MultivariateNormal from sbi.inference import SNLE , SNPE , prepare_for_sbi , simulate_for_sbi from sbi.analysis import pairplot from sbi.utils.metrics import c2st from sbi.simulators.linear_gaussian import ( linear_gaussian , true_posterior_linear_gaussian_mvn_prior , ) # Seeding torch . manual_seed ( 1 ); # Gaussian simulator theta_dim = 2 x_dim = theta_dim # likelihood_mean will be likelihood_shift+theta likelihood_shift = - 1.0 * zeros ( x_dim ) likelihood_cov = 0.3 * eye ( x_dim ) prior_mean = zeros ( theta_dim ) prior_cov = eye ( theta_dim ) prior = MultivariateNormal ( loc = prior_mean , covariance_matrix = prior_cov ) # Define Gaussian simulator simulator , prior = prepare_for_sbi ( lambda theta : linear_gaussian ( theta , likelihood_shift , likelihood_cov ), prior ) # Use built-in function to obtain ground-truth posterior given x_o def get_true_posterior_samples ( x_o , num_samples = 1 ): return true_posterior_linear_gaussian_mvn_prior ( x_o , likelihood_shift , likelihood_cov , prior_mean , prior_cov ) . sample (( num_samples ,))","title":"SBI with trial-based data"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#the-analytical-posterior-concentrates-around-true-parameters-with-increasing-number-of-iid-trials","text":"num_trials = [ 1 , 5 , 15 , 20 ] theta_o = zeros ( 1 , theta_dim ) # Generate multiple x_os with increasing number of trials. xos = [ theta_o . repeat ( nt , 1 ) for nt in num_trials ] # Obtain analytical posterior samples for each of them. true_samples = [ get_true_posterior_samples ( xo , 5000 ) for xo in xos ] # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( true_samples , points = theta_o , diag = \"kde\" , offdiag = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter.","title":"The analytical posterior concentrates around true parameters with increasing number of IID trials"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#iid-inference-with-nle","text":"(S)NLE can easily perform inference given multiple IID x because it is based on learning the likelihood. Once the likelihood is learned on single trials, i.e., a neural network that given a single observation and a parameter predicts the likelihood of that observation given the parameter, one can perform MCMC to obtain posterior samples. MCMC relies on evaluating ratios of likelihoods of candidate parameters to either accept or reject them to be posterior samples. When inferring the posterior given multiple IID observation, these likelihoods are just the joint likelihoods of each IID observation given the current parameter candidate. Thus, given a neural likelihood from SNLE, we can calculate these joint likelihoods and perform MCMC given IID data, we just have to multiply together (or add in log-space) the individual trial-likelihoods ( sbi takes care of that). # Train SNLE. inferer = SNLE ( prior , show_progress_bars = True , density_estimator = \"mdn\" ) theta , x = simulate_for_sbi ( simulator , prior , 10000 , simulation_batch_size = 1000 ) inferer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ); Running 10000 simulations.: 0%| | 0/10000 [00:00<?, ?it/s] Neural network successfully converged after 43 epochs. # Obtain posterior samples for different number of iid xos. nle_samples = [] num_samples = 5000 mcmc_parameters = dict ( num_chains = 50 , thin = 10 , warmup_steps = 50 , init_strategy = \"proposal\" , ) mcmc_method = \"slice_np_vectorized\" posterior = inferer . build_posterior ( mcmc_method = mcmc_method , mcmc_parameters = mcmc_parameters , ) # Generate samples with MCMC given the same set of x_os as above. for xo in xos : nle_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xo )) Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 5 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 15 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 20 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] Note that sbi warns about iid-x with increasing number of trial here. We ignore the warning because that\u2019s exactly what we want to do. # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( nle_samples , points = theta_o , diag = \"kde\" , offdiag = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); The pairplot above already indicates that (S)NLE is well able to obtain accurate posterior samples also for increasing number of trials (note that we trained the single-round version of SNLE so that we did not have to re-train it for new \\(x_o\\) ). Quantitatively we can measure the accuracy of SNLE by calculating the c2st score between SNLE and the true posterior samples, where the best accuracy is perfect for 0.5 : cs = [ c2st ( torch . from_numpy ( s1 ), torch . from_numpy ( s2 )) for s1 , s2 in zip ( true_samples , nle_samples ) ] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.51 c2st score for num_trials=15: 0.51 c2st score for num_trials=20: 0.51","title":"IID inference with NLE"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#iid-inference-with-npe-using-permutation-invariant-embedding-nets","text":"For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials. We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net. To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021). To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. As it is difficult to handle tensors of varying lengths in the SBI training loop, we construct a training data set in which \u201cunobserved\u201d trials are mask by NaNs (and ignore the resulting SBI warning about NaNs in the training data).","title":"IID inference with NPE using permutation-invariant embedding nets"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#construct-training-data-set","text":"# we need to fix the maximum number of trials. max_num_trials = 20 # construct training data set: we want to cover the full range of possible number of # trials num_training_samples = 5000 theta = prior . sample (( num_training_samples ,)) # there are certainly smarter ways to construct the training data set, but we go with a # for loop here for illustration purposes. x = torch . ones ( num_training_samples * max_num_trials , max_num_trials , x_dim ) * float ( \"nan\" ) for i in range ( num_training_samples ): xi = simulator ( theta [ i ] . repeat ( max_num_trials , 1 )) for j in range ( max_num_trials ): x [ i * max_num_trials + j , : j + 1 , :] = xi [: j + 1 , :] theta = theta . repeat_interleave ( max_num_trials , dim = 0 )","title":"Construct training data set."},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#build-embedding-net","text":"from sbi.neural_nets.embedding_nets import ( FCEmbedding , PermutationInvariantEmbedding , ) from sbi.utils import posterior_nn # embedding latent_dim = 10 single_trial_net = FCEmbedding ( input_dim = theta_dim , num_hiddens = 40 , num_layers = 2 , output_dim = latent_dim , ) embedding_net = PermutationInvariantEmbedding ( single_trial_net , trial_net_output_dim = latent_dim , # NOTE: post-embedding is not needed really. num_layers = 1 , num_hiddens = 10 , output_dim = 10 , ) # we choose a simple MDN as the density estimator. # NOTE: we turn off z-scoring of the data, as we used NaNs for the missing trials. density_estimator = posterior_nn ( \"mdn\" , embedding_net = embedding_net , z_score_x = \"none\" )","title":"Build embedding net"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#run-training","text":"inference = SNPE ( prior , density_estimator = density_estimator ) # NOTE: we don't exclude invalid x because we used NaNs for the missing trials. inference . append_simulations ( theta , x , exclude_invalid_x = False , ) . train ( training_batch_size = 1000 ) posterior = inference . build_posterior () WARNING:root:Found 95000 NaN simulations and 0 Inf simulations. They are not excluded from training due to `exclude_invalid_x=False`.Training will likely fail, we strongly recommend `exclude_invalid_x=True` for Single-round NPE. Neural network successfully converged after 168 epochs.","title":"Run training"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#amortized-inference","text":"Comparing runtimes, we see that the NPE training takes a bit longer than the training on single trials for NLE above. However, we trained the density estimator such that it can handle multiple and changing number of iid trials (up to 20). Thus, we can obtain posterior samples for different x_o with just a single forward pass instead of having to run MCMC for each new observation. As you can see below, the c2st score for increasing number of observed trials remains close to the ideal 0.5 . npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) cs = [ c2st ( torch . from_numpy ( s1 ), s2 ) for s1 , s2 in zip ( true_samples , npe_samples )] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.50 c2st score for num_trials=15: 0.52 c2st score for num_trials=20: 0.55 num_trials = [ 1 , 5 , 15 , 20 ] xos = [ theta_o . repeat ( nt , 1 ) for nt in num_trials ] npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( npe_samples , points = theta_o , diag = \"kde\" , offdiag = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] # We can easily obtain posteriors for many different x_os, instantly, because NPE is fully amortized: num_trials = [ 2 , 4 , 6 , 8 , 12 , 14 , 18 ] npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( npe_samples , points = theta_o , diag = \"kde\" , offdiag = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s]","title":"Amortized inference"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/","text":"MCMC diagnostics with Arviz \u00b6 This tutorial shows how to evaluate the quality of MCMC samples generated via sbi using the arviz package. Outline: 1) Train MNLE to approximate the likelihood underlying the simulator 2) Run MCMC using pyro MCMC samplers via sbi interface 3) Use arviz to visualize the posterior, predictive distributions and MCMC diagnostics. import arviz as az import torch from sbi.inference import MNLE , likelihood_estimator_based_potential from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Gamma from sbi.utils import MultipleIndependent from sbi.inference import MCMCPosterior # Seeding torch . manual_seed ( 1 ); # Toy simulator for mixed data def mixed_simulator ( theta ): beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = 2 * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # Define independent priors for each dimension. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , ) Train MNLE to approximate the likelihood \u00b6 For this tutorial, we will use a simple simulator with two parameters. For details see the example on the decision making model . Here, we pass mcmc_method=\"nuts\" in order to use the underlying pyro No-U-turn sampler , but it would work as well with other samplers (e.g. \u201cslice_np_vectorized\u201d, \u201chmc\u201d). Additionally, when calling posterior.sample(...) we pass return_arviz=True so that the Arviz InferenceData object is returned. This object gives us access to the wealth of MCMC diagnostics tool provided by arviz . # Generate training data and train MNLE. num_simulations = 10000 theta = prior . sample (( num_simulations ,)) x = mixed_simulator ( theta ) trainer = MNLE ( prior ) likelihood_estimator = trainer . append_simulations ( theta , x ) . train () /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 65 epochs. Run Pyro NUTS MCMC and obtain arviz InferenceData object \u00b6 # Simulate \"observed\" data x_o torch . manual_seed ( 42 ) num_trials = 100 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) # Set MCMC parameters and run Pyro NUTS. mcmc_parameters = dict ( num_chains = 4 , thin = 5 , warmup_steps = 50 , init_strategy = \"proposal\" , method = \"nuts\" , ) num_samples = 1000 # get the potential function and parameter transform for constructing the posterior potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) mnle_posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform , ** mcmc_parameters ) mnle_samples = mnle_posterior . sample ( ( num_samples ,), x = x_o , show_progress_bars = False ) # get arviz InferenceData object from posterior inference_data = mnle_posterior . get_arviz_inference_data () /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:280: UserWarning: An x with a batch size of 100 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Generate arviz plots \u00b6 The resulting InferenceData object can be passed to most arviz plotting functions, and there are plenty see here for an overview. To get a better understanding of the InferenceData object see here . Below and overview of common MCMC diagnostics plot, see the corresponding arviz documentation for interpretation of the plots. We will a full use-case using the SBI-MCMC-arviz workflow soon. print ( inference_data . posterior ) <xarray.Dataset> Dimensions: (chain: 4, draw: 1254, theta_dim_0: 2) Coordinates: * chain (chain) int64 0 1 2 3 * draw (draw) int64 0 1 2 3 4 5 6 ... 1248 1249 1250 1251 1252 1253 * theta_dim_0 (theta_dim_0) int64 0 1 Data variables: theta (chain, draw, theta_dim_0) float32 2.125 0.8092 ... 0.8088 Attributes: created_at: 2022-08-10T14:02:41.300799 arviz_version: 0.11.2 Diagnostic plots \u00b6 az . style . use ( \"arviz-darkgrid\" ) az . plot_rank ( inference_data ) array([<AxesSubplot:title={'center':'theta\\n0'}, xlabel='Rank (all chains)', ylabel='Chain'>, <AxesSubplot:title={'center':'theta\\n1'}, xlabel='Rank (all chains)', ylabel='Chain'>], dtype=object) az . plot_autocorr ( inference_data ); az . plot_trace ( inference_data , compact = False ); az . plot_ess ( inference_data , kind = \"evolution\" ); Posterior density plots \u00b6 az . plot_posterior ( inference_data ) array([<AxesSubplot:title={'center':'theta\\n0'}>, <AxesSubplot:title={'center':'theta\\n1'}>], dtype=object) print ( f \"Given the { num_trials } we observed, the posterior is centered around true underlying parameters theta_o: { theta_o } \" ) Given the 100 we observed, the posterior is centered around true underlying parameters theta_o: tensor([[1.9622, 0.7550]]) az . plot_pair ( inference_data ) <AxesSubplot:xlabel='theta\\n0', ylabel='theta\\n1'> az . plot_pair ( inference_data , var_names = [ \"theta\" ], kind = \"hexbin\" , marginals = True , figsize = ( 10 , 10 ), ) array([[<AxesSubplot:>, None], [<AxesSubplot:xlabel='theta\\n0', ylabel='theta\\n1'>, <AxesSubplot:>]], dtype=object)","title":"Density plots and MCMC diagnostics with ArviZ"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#mcmc-diagnostics-with-arviz","text":"This tutorial shows how to evaluate the quality of MCMC samples generated via sbi using the arviz package. Outline: 1) Train MNLE to approximate the likelihood underlying the simulator 2) Run MCMC using pyro MCMC samplers via sbi interface 3) Use arviz to visualize the posterior, predictive distributions and MCMC diagnostics. import arviz as az import torch from sbi.inference import MNLE , likelihood_estimator_based_potential from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Gamma from sbi.utils import MultipleIndependent from sbi.inference import MCMCPosterior # Seeding torch . manual_seed ( 1 ); # Toy simulator for mixed data def mixed_simulator ( theta ): beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = 2 * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # Define independent priors for each dimension. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , )","title":"MCMC diagnostics with Arviz"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#train-mnle-to-approximate-the-likelihood","text":"For this tutorial, we will use a simple simulator with two parameters. For details see the example on the decision making model . Here, we pass mcmc_method=\"nuts\" in order to use the underlying pyro No-U-turn sampler , but it would work as well with other samplers (e.g. \u201cslice_np_vectorized\u201d, \u201chmc\u201d). Additionally, when calling posterior.sample(...) we pass return_arviz=True so that the Arviz InferenceData object is returned. This object gives us access to the wealth of MCMC diagnostics tool provided by arviz . # Generate training data and train MNLE. num_simulations = 10000 theta = prior . sample (( num_simulations ,)) x = mixed_simulator ( theta ) trainer = MNLE ( prior ) likelihood_estimator = trainer . append_simulations ( theta , x ) . train () /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 65 epochs.","title":"Train MNLE to approximate the likelihood"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#run-pyro-nuts-mcmc-and-obtain-arviz-inferencedata-object","text":"# Simulate \"observed\" data x_o torch . manual_seed ( 42 ) num_trials = 100 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) # Set MCMC parameters and run Pyro NUTS. mcmc_parameters = dict ( num_chains = 4 , thin = 5 , warmup_steps = 50 , init_strategy = \"proposal\" , method = \"nuts\" , ) num_samples = 1000 # get the potential function and parameter transform for constructing the posterior potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) mnle_posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform , ** mcmc_parameters ) mnle_samples = mnle_posterior . sample ( ( num_samples ,), x = x_o , show_progress_bars = False ) # get arviz InferenceData object from posterior inference_data = mnle_posterior . get_arviz_inference_data () /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:280: UserWarning: An x with a batch size of 100 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn(","title":"Run Pyro NUTS MCMC and obtain arviz InferenceData object"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#generate-arviz-plots","text":"The resulting InferenceData object can be passed to most arviz plotting functions, and there are plenty see here for an overview. To get a better understanding of the InferenceData object see here . Below and overview of common MCMC diagnostics plot, see the corresponding arviz documentation for interpretation of the plots. We will a full use-case using the SBI-MCMC-arviz workflow soon. print ( inference_data . posterior ) <xarray.Dataset> Dimensions: (chain: 4, draw: 1254, theta_dim_0: 2) Coordinates: * chain (chain) int64 0 1 2 3 * draw (draw) int64 0 1 2 3 4 5 6 ... 1248 1249 1250 1251 1252 1253 * theta_dim_0 (theta_dim_0) int64 0 1 Data variables: theta (chain, draw, theta_dim_0) float32 2.125 0.8092 ... 0.8088 Attributes: created_at: 2022-08-10T14:02:41.300799 arviz_version: 0.11.2","title":"Generate arviz plots"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#diagnostic-plots","text":"az . style . use ( \"arviz-darkgrid\" ) az . plot_rank ( inference_data ) array([<AxesSubplot:title={'center':'theta\\n0'}, xlabel='Rank (all chains)', ylabel='Chain'>, <AxesSubplot:title={'center':'theta\\n1'}, xlabel='Rank (all chains)', ylabel='Chain'>], dtype=object) az . plot_autocorr ( inference_data ); az . plot_trace ( inference_data , compact = False ); az . plot_ess ( inference_data , kind = \"evolution\" );","title":"Diagnostic plots"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#posterior-density-plots","text":"az . plot_posterior ( inference_data ) array([<AxesSubplot:title={'center':'theta\\n0'}>, <AxesSubplot:title={'center':'theta\\n1'}>], dtype=object) print ( f \"Given the { num_trials } we observed, the posterior is centered around true underlying parameters theta_o: { theta_o } \" ) Given the 100 we observed, the posterior is centered around true underlying parameters theta_o: tensor([[1.9622, 0.7550]]) az . plot_pair ( inference_data ) <AxesSubplot:xlabel='theta\\n0', ylabel='theta\\n1'> az . plot_pair ( inference_data , var_names = [ \"theta\" ], kind = \"hexbin\" , marginals = True , figsize = ( 10 , 10 ), ) array([[<AxesSubplot:>, None], [<AxesSubplot:xlabel='theta\\n0', ylabel='theta\\n1'>, <AxesSubplot:>]], dtype=object)","title":"Posterior density  plots"},{"location":"tutorial/16_implemented_methods/","text":"API of implemented methods \u00b6 This notebook spells out the API for all algorithms implemented in the sbi toolbox: Posterior estimation (SNPE) Likelihood estimation (SNLE) Likelihood-ratio estimation (SNRE) Utilities Posterior estimation (SNPE) \u00b6 Fast \u03b5-free Inference of Simulation Models with Bayesian Conditional Density Estimation by Papamakarios & Murray (NeurIPS 2016) [PDF] [BibTeX] from sbi.inference import SNPE_A inference = SNPE_A ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Automatic posterior transformation for likelihood-free inference by Greenberg, Nonnenmacher & Macke (ICML 2019) [PDF] from sbi.inference import SNPE inference = SNPE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Truncated proposals for scalable and hassle-free simulation-based inference by Deistler, Goncalves & Macke (NeurIPS 2022) [Paper] from sbi.inference import SNPE from sbi.utils import get_density_thresholder , RestrictedPrior inference = SNPE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( force_first_round_loss = True ) posterior = inference . build_posterior () . set_default_x ( x_o ) accept_reject_fn = get_density_thresholder ( posterior , quantile = 1e-4 ) proposal = RestrictedPrior ( prior , accept_reject_fn , sample_with = \"rejection\" ) Likelihood estimation (SNLE) \u00b6 Sequential neural likelihood: Fast likelihood-free inference with autoregressive flows by Papamakarios, Sterratt & Murray (AISTATS 2019) [PDF] [BibTeX] from sbi.inference import SNLE inference = SNLE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Variational methods for simulation-based inference by Gl\u00f6ckler, Deistler, Macke (ICLR 2022) [Paper] from sbi.inference import SNLE inference = SNLE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior ( sample_with = \"vi\" , vi_method = \"fKL\" ) . set_default_x ( x_o ) proposal = posterior Flexible and efficient simulation-based inference for models of decision-making by Boelts, Lueckmann, Gao, Macke (Elife 2022) [Paper] from sbi.inference import MNLE inference = MNLE ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) Likelihood-ratio estimation (SNRE) \u00b6 Likelihood-free MCMC with Amortized Approximate Likelihood Ratios by Hermans, Begy & Louppe (ICML 2020) [PDF] from sbi.inference import SNRE_A inference = SNRE_A ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) On Contrastive Learning for Likelihood-free Inference Durkan, Murray & Papamakarios (ICML 2020) [PDF] . from sbi.inference import SNRE inference = SNRE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation by Delaunoy, Hermans, Rozet, Wehenkel & Louppe (NeurIPS 2022) [PDF] from sbi.inference import BNRE inference = BNRE ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( regularization_strength = 100. ) posterior = inference . build_posterior () . set_default_x ( x_o ) Contrastive Neural Ratio Estimation Benjamin Kurt Miller, Christoph Weniger, Patrick Forr\u00e9 (NeurIPS 2022) [PDF] # The main feature of NRE-C is producing an exact ratio of densities at optimum, even when using multiple contrastive pairs (classes). from sbi.inference import SNRE_C # Amortized inference inference = SNRE_C ( prior ) proposal = prior theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( num_classes = 5 , # SNRE_C sees `2 * num_classes - 1` marginally drawn contrastive pairs. gamma = 1.0 , # SNRE_C can control the weight between terms in its loss function. ) posterior = inference . build_posterior () . set_default_x ( x_o ) Utilities \u00b6 Simulation-based calibration by Talts, Betancourt, Simpson, Vehtari, Gelman (arxiv 2018) [Paper] ) from sbi.analysis import run_sbc , sbc_rank_plot thetas = prior . sample (( 1_000 ,)) xs = simulator ( thetas ) ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = 1_000 ) _ = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , ) Restriction estimator by Deistler, Macke & Goncalves (PNAS 2022) [Paper] from sbi.inference import SNPE from sbi.utils import RestrictionEstimator restriction_estimator = RestrictionEstimator ( prior = prior ) proposal = prior for _ in range ( num_rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) restriction_estimator . append_simulations ( theta , x ) classifier = restriction_estimator . train () proposal = restriction_estimator . restrict_prior () all_theta , all_x , _ = restriction_estimator . get_simulations () inference = SNPE ( prior ) density_estimator = inference . append_simulations ( all_theta , all_x ) . train () posterior = inference . build_posterior () Expected coverage (sample-based) as computed in Deistler, Goncalves, Macke (Neurips 2022) [Paper] and in Rozet, Louppe (2021) [Paper] from sbi.analysis import run_sbc , sbc_rank_plot thetas = prior . sample (( 1_000 ,)) xs = simulator ( thetas ) ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = 1_000 , reduce_fns = posterior . log_prob ) _ = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , )","title":"Implemented algorithms"},{"location":"tutorial/16_implemented_methods/#api-of-implemented-methods","text":"This notebook spells out the API for all algorithms implemented in the sbi toolbox: Posterior estimation (SNPE) Likelihood estimation (SNLE) Likelihood-ratio estimation (SNRE) Utilities","title":"API of implemented methods"},{"location":"tutorial/16_implemented_methods/#posterior-estimation-snpe","text":"Fast \u03b5-free Inference of Simulation Models with Bayesian Conditional Density Estimation by Papamakarios & Murray (NeurIPS 2016) [PDF] [BibTeX] from sbi.inference import SNPE_A inference = SNPE_A ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Automatic posterior transformation for likelihood-free inference by Greenberg, Nonnenmacher & Macke (ICML 2019) [PDF] from sbi.inference import SNPE inference = SNPE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Truncated proposals for scalable and hassle-free simulation-based inference by Deistler, Goncalves & Macke (NeurIPS 2022) [Paper] from sbi.inference import SNPE from sbi.utils import get_density_thresholder , RestrictedPrior inference = SNPE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( force_first_round_loss = True ) posterior = inference . build_posterior () . set_default_x ( x_o ) accept_reject_fn = get_density_thresholder ( posterior , quantile = 1e-4 ) proposal = RestrictedPrior ( prior , accept_reject_fn , sample_with = \"rejection\" )","title":"Posterior estimation (SNPE)"},{"location":"tutorial/16_implemented_methods/#likelihood-estimation-snle","text":"Sequential neural likelihood: Fast likelihood-free inference with autoregressive flows by Papamakarios, Sterratt & Murray (AISTATS 2019) [PDF] [BibTeX] from sbi.inference import SNLE inference = SNLE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Variational methods for simulation-based inference by Gl\u00f6ckler, Deistler, Macke (ICLR 2022) [Paper] from sbi.inference import SNLE inference = SNLE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior ( sample_with = \"vi\" , vi_method = \"fKL\" ) . set_default_x ( x_o ) proposal = posterior Flexible and efficient simulation-based inference for models of decision-making by Boelts, Lueckmann, Gao, Macke (Elife 2022) [Paper] from sbi.inference import MNLE inference = MNLE ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o )","title":"Likelihood estimation (SNLE)"},{"location":"tutorial/16_implemented_methods/#likelihood-ratio-estimation-snre","text":"Likelihood-free MCMC with Amortized Approximate Likelihood Ratios by Hermans, Begy & Louppe (ICML 2020) [PDF] from sbi.inference import SNRE_A inference = SNRE_A ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) On Contrastive Learning for Likelihood-free Inference Durkan, Murray & Papamakarios (ICML 2020) [PDF] . from sbi.inference import SNRE inference = SNRE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation by Delaunoy, Hermans, Rozet, Wehenkel & Louppe (NeurIPS 2022) [PDF] from sbi.inference import BNRE inference = BNRE ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( regularization_strength = 100. ) posterior = inference . build_posterior () . set_default_x ( x_o ) Contrastive Neural Ratio Estimation Benjamin Kurt Miller, Christoph Weniger, Patrick Forr\u00e9 (NeurIPS 2022) [PDF] # The main feature of NRE-C is producing an exact ratio of densities at optimum, even when using multiple contrastive pairs (classes). from sbi.inference import SNRE_C # Amortized inference inference = SNRE_C ( prior ) proposal = prior theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( num_classes = 5 , # SNRE_C sees `2 * num_classes - 1` marginally drawn contrastive pairs. gamma = 1.0 , # SNRE_C can control the weight between terms in its loss function. ) posterior = inference . build_posterior () . set_default_x ( x_o )","title":"Likelihood-ratio estimation (SNRE)"},{"location":"tutorial/16_implemented_methods/#utilities","text":"Simulation-based calibration by Talts, Betancourt, Simpson, Vehtari, Gelman (arxiv 2018) [Paper] ) from sbi.analysis import run_sbc , sbc_rank_plot thetas = prior . sample (( 1_000 ,)) xs = simulator ( thetas ) ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = 1_000 ) _ = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , ) Restriction estimator by Deistler, Macke & Goncalves (PNAS 2022) [Paper] from sbi.inference import SNPE from sbi.utils import RestrictionEstimator restriction_estimator = RestrictionEstimator ( prior = prior ) proposal = prior for _ in range ( num_rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) restriction_estimator . append_simulations ( theta , x ) classifier = restriction_estimator . train () proposal = restriction_estimator . restrict_prior () all_theta , all_x , _ = restriction_estimator . get_simulations () inference = SNPE ( prior ) density_estimator = inference . append_simulations ( all_theta , all_x ) . train () posterior = inference . build_posterior () Expected coverage (sample-based) as computed in Deistler, Goncalves, Macke (Neurips 2022) [Paper] and in Rozet, Louppe (2021) [Paper] from sbi.analysis import run_sbc , sbc_rank_plot thetas = prior . sample (( 1_000 ,)) xs = simulator ( thetas ) ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = 1_000 , reduce_fns = posterior . log_prob ) _ = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , )","title":"Utilities"},{"location":"tutorial/17_SBI_for_models_of_decision_making/","text":"SBI with mixed data, iid data, and experimental conditions \u00b6 For a general tutorial on using SBI with trial-based iid data, see tutorial 14 . Here, we cover the use-case often occurring in models of decision-making: trial-based data with mixed data types and varying experimental conditions. Trial-based SBI with mixed data types \u00b6 In some cases, models with trial-based data additionally return data with mixed data types, e.g., continous and discrete data. For example, most computational models of decision-making have continuous reaction times and discrete choices as output. This can induce a problem when performing trial-based SBI that relies on learning a neural likelihood: It is challenging for most density estimators to handle both, continuous and discrete data at the same time. However, there is a recent SBI method for solving this problem, it\u2019s called Mixed Neural Likelihood Estimation (MNLE). It works just like NLE, but with mixed data types. The trick is that it learns two separate density estimators, one for the discrete part of the data, and one for the continuous part, and combines the two to obtain the final neural likelihood. Crucially, the continuous density estimator is trained conditioned on the output of the discrete one, such that statistical dependencies between the discrete and continuous data (e.g., between choices and reaction times) are modeled as well. The interested reader is referred to the original paper available here . MNLE was recently added to sbi (see this PR and also issue ) and follow the same API as SNLE . In this tutorial we will show how to apply MNLE to mixed data, and how to deal with varying experimental conditions. Toy problem for MNLE \u00b6 To illustrate MNLE we set up a toy simulator that outputs mixed data and for which we know the likelihood such we can obtain reference posterior samples via MCMC. Simulator : To simulate mixed data we do the following Sample reaction time from inverse Gamma Sample choices from Binomial Return reaction time \\(rt \\in (0, \\infty)\\) and choice index \\(c \\in \\{0, 1\\}\\) \\[ c \\sim \\text{Binomial}(\\rho) \\\\ rt \\sim \\text{InverseGamma}(\\alpha=2, \\beta) \\\\ \\] Prior : The priors of the two parameters \\(\\rho\\) and \\(\\beta\\) are independent. We define a Beta prior over the probabilty parameter of the Binomial used in the simulator and a Gamma prior over the shape-parameter of the inverse Gamma used in the simulator: \\[ p(\\beta, \\rho) = p(\\beta) \\; p(\\rho) ; \\\\ p(\\beta) = \\text{Gamma}(1, 0.5) \\\\ p(\\text{probs}) = \\text{Beta}(2, 2) \\] Because the InverseGamma and the Binomial likelihoods are well-defined we can perform MCMC on this problem and obtain reference-posterior samples. import matplotlib.pyplot as plt import torch from torch import Tensor from sbi.inference import MNLE from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Categorical , Gamma from sbi.utils import MultipleIndependent from sbi.utils.metrics import c2st from sbi.analysis import pairplot from sbi.inference import MCMCPosterior from sbi.utils.torchutils import atleast_2d from sbi.inference.potentials.likelihood_based_potential import ( MixedLikelihoodBasedPotential , ) from sbi.utils.conditional_density_utils import ConditionedPotential from sbi.utils import mcmc_transform from sbi.inference.potentials.base_potential import BasePotential # Toy simulator for mixed data def mixed_simulator ( theta : Tensor , concentration_scaling : float = 1.0 ): \"\"\"Returns a sample from a mixed distribution given parameters theta. Args: theta: batch of parameters, shape (batch_size, 2) concentration_scaling: scaling factor for the concentration parameter of the InverseGamma distribution, mimics an experimental condition. \"\"\" beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = concentration_scaling * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # The potential function defines the ground truth likelihood and allows us to obtain reference posterior samples via MCMC. class PotentialFunctionProvider ( BasePotential ): allow_iid_x = True # type: ignore def __init__ ( self , prior , x_o , concentration_scaling = 1.0 , device = \"cpu\" ): super () . __init__ ( prior , x_o , device ) self . concentration_scaling = concentration_scaling def __call__ ( self , theta , track_gradients : bool = True ): theta = atleast_2d ( theta ) with torch . set_grad_enabled ( track_gradients ): iid_ll = self . iid_likelihood ( theta ) return iid_ll + self . prior . log_prob ( theta ) def iid_likelihood ( self , theta ): lp_choices = torch . stack ( [ Binomial ( probs = th . reshape ( 1 , - 1 )) . log_prob ( self . x_o [:, 1 :]) for th in theta [:, 1 :] ], dim = 1 , ) lp_rts = torch . stack ( [ InverseGamma ( concentration = self . concentration_scaling * torch . ones_like ( beta_i ), rate = beta_i , ) . log_prob ( self . x_o [:, : 1 ]) for beta_i in theta [:, : 1 ] ], dim = 1 , ) joint_likelihood = ( lp_choices + lp_rts ) . squeeze () assert joint_likelihood . shape == torch . Size ([ self . x_o . shape [ 0 ], theta . shape [ 0 ]]) return joint_likelihood . sum ( 0 ) # Define independent prior. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , ) Obtain reference-posterior samples via analytical likelihood and MCMC \u00b6 torch . manual_seed ( 42 ) num_trials = 10 num_samples = 1000 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) mcmc_kwargs = dict ( num_chains = 20 , warmup_steps = 50 , method = \"slice_np_vectorized\" , init_strategy = \"proposal\" , ) true_posterior = MCMCPosterior ( potential_fn = PotentialFunctionProvider ( prior , x_o ), proposal = prior , theta_transform = mcmc_transform ( prior , enable_transform = True ), ** mcmc_kwargs , ) true_samples = true_posterior . sample (( num_samples ,)) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 10 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Train MNLE and generate samples via MCMC \u00b6 # Training data num_simulations = 20000 # For training the MNLE emulator we need to define a proposal distribution, the prior is # a good choice. proposal = prior theta = proposal . sample (( num_simulations ,)) x = mixed_simulator ( theta ) # Train MNLE and obtain MCMC-based posterior. trainer = MNLE () estimator = trainer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ) /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 73 epochs. # Build posterior from the trained estimator and prior. mnle_posterior = trainer . build_posterior ( prior = prior ) mnle_samples = mnle_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Compare MNLE and reference posterior \u00b6 # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_samples , mnle_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); We see that the inferred MNLE posterior nicely matches the reference posterior, and how both inferred a posterior that is quite different from the prior. Because MNLE training is amortized we can obtain another posterior given a different observation with potentially a different number of trials, just by running MCMC again (without re-training MNLE ): Repeat inference with different x_o that contains more trials \u00b6 num_trials = 50 x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) true_samples = true_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) mnle_samples = mnle_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 50 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_samples , mnle_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); print ( c2st ( true_samples , mnle_samples )[ 0 ]) tensor(0.5565) Again we can see that the posteriors match nicely. In addition, we observe that the posterior\u2019s (epistemic) uncertainty reduces as we increase the number of trials. Note: MNLE is trained on single-trial data. Theoretically, density estimation is perfectly accurate only in the limit of infinite training data. Thus, training with a finite amount of training data naturally induces a small bias in the density estimator. As we observed above, this bias is so small that we don\u2019t really notice it, e.g., the c2st scores were close to 0.5. However, when we increase the number of trials in x_o dramatically (on the order of 1000s) the small bias can accumulate over the trials and inference with MNLE can become less accurate. MNLE with experimental conditions \u00b6 In the perceptual decision-making research it is common to design experiments with varying experimental decisions, e.g., to vary the difficulty of the task. During parameter inference, it can be beneficial to incorporate the experimental conditions. In MNLE, we are learning an emulator that should be able to generate synthetic experimental data including reaction times and choices given different experimental conditions. Thus, to make MNLE work with experimental conditions, we need to include them in the training process, i.e., treat them like auxiliary parameters of the simulator: # define a simulator wrapper in which the experimental condition are contained in theta and passed to the simulator. def sim_wrapper ( theta ): # simulate with experiment conditions return mixed_simulator ( theta = theta [:, : 2 ], concentration_scaling = theta [:, 2 :] + 1 , # add 1 to deal with 0 values from Categorical distribution ) # Define a proposal that contains both, priors for the parameters and a discrte prior over experimental conditions. proposal = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), Categorical ( probs = torch . ones ( 1 , 3 )), ], validate_args = False , ) # Simulated data num_simulations = 10000 num_samples = 1000 theta = proposal . sample (( num_simulations ,)) x = sim_wrapper ( theta ) assert x . shape == ( num_simulations , 2 ) # simulate observed data and define ground truth parameters num_trials = 10 theta_o = proposal . sample (( 1 ,)) theta_o [ 0 , 2 ] = 2.0 # set condition to 2 as in original simulator. x_o = sim_wrapper ( theta_o . repeat ( num_trials , 1 )) Obtain ground truth posterior via MCMC \u00b6 We obtain a ground-truth posterior via MCMC by using the PotentialFunctionProvider. For that, we first the define the actual prior, i.e., the distribution over the parameter we want to infer (not the proposal). Thus, we leave out the discrete prior over experimental conditions. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , ) prior_transform = mcmc_transform ( prior ) # We can now use the PotentialFunctionProvider to obtain a ground-truth posterior via MCMC. true_posterior_samples = MCMCPosterior ( PotentialFunctionProvider ( prior , x_o , concentration_scaling = float ( theta_o [ 0 , 2 ]) + 1.0 , # add one because the sim_wrapper adds one (see above) ), theta_transform = prior_transform , proposal = prior , ** mcmc_kwargs , ) . sample (( num_samples ,), show_progress_bars = True ) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 10 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Train MNLE including experimental conditions \u00b6 trainer = MNLE ( proposal ) estimator = trainer . append_simulations ( theta , x ) . train ( training_batch_size = 100 ) /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 73 epochs. Construct conditional potential function \u00b6 To obtain posterior samples conditioned on a particular experimental condition (and on x_o), we need to construct a corresponding potential function. # First, we define the potential function for the complete, unconditional MNLE-likelihood. potential_fn = MixedLikelihoodBasedPotential ( estimator , proposal , x_o ) # Then we use the potential to construct the conditional potential function. # Here, we tell the constructor to condition on the last dimension (index 2) by passing dims_to_sample=[0, 1]. conditioned_potential_fn = ConditionedPotential ( potential_fn , condition = theta_o , dims_to_sample = [ 0 , 1 ], allow_iid_x = True , # we also need to explicitly tell that MNLE allows iid_x ) # Using this potential function, we can now obtain conditional samples. mnle_posterior = MCMCPosterior ( potential_fn = conditioned_potential_fn , theta_transform = prior_transform , proposal = prior , ** mcmc_kwargs ) conditional_samples = mnle_posterior . sample (( num_samples ,), x = x_o ) Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] # Finally, we can compare the ground truth conditional posterior with the MNLE-conditional posterior. fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_posterior_samples , conditional_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); They match accurately, showing that we can indeed post-hoc condition the trained MNLE likelihood on different experimental conditions.","title":"SBI with mixed data, iid data, and experimental conditions"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#sbi-with-mixed-data-iid-data-and-experimental-conditions","text":"For a general tutorial on using SBI with trial-based iid data, see tutorial 14 . Here, we cover the use-case often occurring in models of decision-making: trial-based data with mixed data types and varying experimental conditions.","title":"SBI with mixed data, iid data, and experimental conditions"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#trial-based-sbi-with-mixed-data-types","text":"In some cases, models with trial-based data additionally return data with mixed data types, e.g., continous and discrete data. For example, most computational models of decision-making have continuous reaction times and discrete choices as output. This can induce a problem when performing trial-based SBI that relies on learning a neural likelihood: It is challenging for most density estimators to handle both, continuous and discrete data at the same time. However, there is a recent SBI method for solving this problem, it\u2019s called Mixed Neural Likelihood Estimation (MNLE). It works just like NLE, but with mixed data types. The trick is that it learns two separate density estimators, one for the discrete part of the data, and one for the continuous part, and combines the two to obtain the final neural likelihood. Crucially, the continuous density estimator is trained conditioned on the output of the discrete one, such that statistical dependencies between the discrete and continuous data (e.g., between choices and reaction times) are modeled as well. The interested reader is referred to the original paper available here . MNLE was recently added to sbi (see this PR and also issue ) and follow the same API as SNLE . In this tutorial we will show how to apply MNLE to mixed data, and how to deal with varying experimental conditions.","title":"Trial-based SBI with mixed data types"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#toy-problem-for-mnle","text":"To illustrate MNLE we set up a toy simulator that outputs mixed data and for which we know the likelihood such we can obtain reference posterior samples via MCMC. Simulator : To simulate mixed data we do the following Sample reaction time from inverse Gamma Sample choices from Binomial Return reaction time \\(rt \\in (0, \\infty)\\) and choice index \\(c \\in \\{0, 1\\}\\) \\[ c \\sim \\text{Binomial}(\\rho) \\\\ rt \\sim \\text{InverseGamma}(\\alpha=2, \\beta) \\\\ \\] Prior : The priors of the two parameters \\(\\rho\\) and \\(\\beta\\) are independent. We define a Beta prior over the probabilty parameter of the Binomial used in the simulator and a Gamma prior over the shape-parameter of the inverse Gamma used in the simulator: \\[ p(\\beta, \\rho) = p(\\beta) \\; p(\\rho) ; \\\\ p(\\beta) = \\text{Gamma}(1, 0.5) \\\\ p(\\text{probs}) = \\text{Beta}(2, 2) \\] Because the InverseGamma and the Binomial likelihoods are well-defined we can perform MCMC on this problem and obtain reference-posterior samples. import matplotlib.pyplot as plt import torch from torch import Tensor from sbi.inference import MNLE from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Categorical , Gamma from sbi.utils import MultipleIndependent from sbi.utils.metrics import c2st from sbi.analysis import pairplot from sbi.inference import MCMCPosterior from sbi.utils.torchutils import atleast_2d from sbi.inference.potentials.likelihood_based_potential import ( MixedLikelihoodBasedPotential , ) from sbi.utils.conditional_density_utils import ConditionedPotential from sbi.utils import mcmc_transform from sbi.inference.potentials.base_potential import BasePotential # Toy simulator for mixed data def mixed_simulator ( theta : Tensor , concentration_scaling : float = 1.0 ): \"\"\"Returns a sample from a mixed distribution given parameters theta. Args: theta: batch of parameters, shape (batch_size, 2) concentration_scaling: scaling factor for the concentration parameter of the InverseGamma distribution, mimics an experimental condition. \"\"\" beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = concentration_scaling * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # The potential function defines the ground truth likelihood and allows us to obtain reference posterior samples via MCMC. class PotentialFunctionProvider ( BasePotential ): allow_iid_x = True # type: ignore def __init__ ( self , prior , x_o , concentration_scaling = 1.0 , device = \"cpu\" ): super () . __init__ ( prior , x_o , device ) self . concentration_scaling = concentration_scaling def __call__ ( self , theta , track_gradients : bool = True ): theta = atleast_2d ( theta ) with torch . set_grad_enabled ( track_gradients ): iid_ll = self . iid_likelihood ( theta ) return iid_ll + self . prior . log_prob ( theta ) def iid_likelihood ( self , theta ): lp_choices = torch . stack ( [ Binomial ( probs = th . reshape ( 1 , - 1 )) . log_prob ( self . x_o [:, 1 :]) for th in theta [:, 1 :] ], dim = 1 , ) lp_rts = torch . stack ( [ InverseGamma ( concentration = self . concentration_scaling * torch . ones_like ( beta_i ), rate = beta_i , ) . log_prob ( self . x_o [:, : 1 ]) for beta_i in theta [:, : 1 ] ], dim = 1 , ) joint_likelihood = ( lp_choices + lp_rts ) . squeeze () assert joint_likelihood . shape == torch . Size ([ self . x_o . shape [ 0 ], theta . shape [ 0 ]]) return joint_likelihood . sum ( 0 ) # Define independent prior. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , )","title":"Toy problem for MNLE"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#obtain-reference-posterior-samples-via-analytical-likelihood-and-mcmc","text":"torch . manual_seed ( 42 ) num_trials = 10 num_samples = 1000 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) mcmc_kwargs = dict ( num_chains = 20 , warmup_steps = 50 , method = \"slice_np_vectorized\" , init_strategy = \"proposal\" , ) true_posterior = MCMCPosterior ( potential_fn = PotentialFunctionProvider ( prior , x_o ), proposal = prior , theta_transform = mcmc_transform ( prior , enable_transform = True ), ** mcmc_kwargs , ) true_samples = true_posterior . sample (( num_samples ,)) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 10 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s]","title":"Obtain reference-posterior samples via analytical likelihood and MCMC"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#train-mnle-and-generate-samples-via-mcmc","text":"# Training data num_simulations = 20000 # For training the MNLE emulator we need to define a proposal distribution, the prior is # a good choice. proposal = prior theta = proposal . sample (( num_simulations ,)) x = mixed_simulator ( theta ) # Train MNLE and obtain MCMC-based posterior. trainer = MNLE () estimator = trainer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ) /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 73 epochs. # Build posterior from the trained estimator and prior. mnle_posterior = trainer . build_posterior ( prior = prior ) mnle_samples = mnle_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s]","title":"Train MNLE and generate samples via MCMC"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#compare-mnle-and-reference-posterior","text":"# Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_samples , mnle_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); We see that the inferred MNLE posterior nicely matches the reference posterior, and how both inferred a posterior that is quite different from the prior. Because MNLE training is amortized we can obtain another posterior given a different observation with potentially a different number of trials, just by running MCMC again (without re-training MNLE ):","title":"Compare MNLE and reference posterior"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#repeat-inference-with-different-x_o-that-contains-more-trials","text":"num_trials = 50 x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) true_samples = true_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) mnle_samples = mnle_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 50 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_samples , mnle_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); print ( c2st ( true_samples , mnle_samples )[ 0 ]) tensor(0.5565) Again we can see that the posteriors match nicely. In addition, we observe that the posterior\u2019s (epistemic) uncertainty reduces as we increase the number of trials. Note: MNLE is trained on single-trial data. Theoretically, density estimation is perfectly accurate only in the limit of infinite training data. Thus, training with a finite amount of training data naturally induces a small bias in the density estimator. As we observed above, this bias is so small that we don\u2019t really notice it, e.g., the c2st scores were close to 0.5. However, when we increase the number of trials in x_o dramatically (on the order of 1000s) the small bias can accumulate over the trials and inference with MNLE can become less accurate.","title":"Repeat inference with different x_o that contains more trials"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#mnle-with-experimental-conditions","text":"In the perceptual decision-making research it is common to design experiments with varying experimental decisions, e.g., to vary the difficulty of the task. During parameter inference, it can be beneficial to incorporate the experimental conditions. In MNLE, we are learning an emulator that should be able to generate synthetic experimental data including reaction times and choices given different experimental conditions. Thus, to make MNLE work with experimental conditions, we need to include them in the training process, i.e., treat them like auxiliary parameters of the simulator: # define a simulator wrapper in which the experimental condition are contained in theta and passed to the simulator. def sim_wrapper ( theta ): # simulate with experiment conditions return mixed_simulator ( theta = theta [:, : 2 ], concentration_scaling = theta [:, 2 :] + 1 , # add 1 to deal with 0 values from Categorical distribution ) # Define a proposal that contains both, priors for the parameters and a discrte prior over experimental conditions. proposal = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), Categorical ( probs = torch . ones ( 1 , 3 )), ], validate_args = False , ) # Simulated data num_simulations = 10000 num_samples = 1000 theta = proposal . sample (( num_simulations ,)) x = sim_wrapper ( theta ) assert x . shape == ( num_simulations , 2 ) # simulate observed data and define ground truth parameters num_trials = 10 theta_o = proposal . sample (( 1 ,)) theta_o [ 0 , 2 ] = 2.0 # set condition to 2 as in original simulator. x_o = sim_wrapper ( theta_o . repeat ( num_trials , 1 ))","title":"MNLE with experimental conditions"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#train-mnle-including-experimental-conditions","text":"trainer = MNLE ( proposal ) estimator = trainer . append_simulations ( theta , x ) . train ( training_batch_size = 100 ) /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 73 epochs.","title":"Train MNLE including experimental conditions"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#construct-conditional-potential-function","text":"To obtain posterior samples conditioned on a particular experimental condition (and on x_o), we need to construct a corresponding potential function. # First, we define the potential function for the complete, unconditional MNLE-likelihood. potential_fn = MixedLikelihoodBasedPotential ( estimator , proposal , x_o ) # Then we use the potential to construct the conditional potential function. # Here, we tell the constructor to condition on the last dimension (index 2) by passing dims_to_sample=[0, 1]. conditioned_potential_fn = ConditionedPotential ( potential_fn , condition = theta_o , dims_to_sample = [ 0 , 1 ], allow_iid_x = True , # we also need to explicitly tell that MNLE allows iid_x ) # Using this potential function, we can now obtain conditional samples. mnle_posterior = MCMCPosterior ( potential_fn = conditioned_potential_fn , theta_transform = prior_transform , proposal = prior , ** mcmc_kwargs ) conditional_samples = mnle_posterior . sample (( num_samples ,), x = x_o ) Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] # Finally, we can compare the ground truth conditional posterior with the MNLE-conditional posterior. fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_posterior_samples , conditional_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); They match accurately, showing that we can indeed post-hoc condition the trained MNLE likelihood on different experimental conditions.","title":"Construct conditional potential function"},{"location":"tutorial/17_vi_posteriors/","text":"Using Variational Inference for Building Posteriors \u00b6 In the previous tutorial, we saw how to build the posterior and how to specialize on one specific observation x_o . If one uses SNPE, then the posterior can be sampled from directly, yet this comes at the expense of necessary correction terms during training, since the samples are obtained from the \u201cwrong\u201d prior for num_rounds > 1 . For SNLE or SNRE, MCMC sampling is required, which is computationally expensive. With SNVI (sequential neural variational inference), it is possible to directly sample from the posterior without any corrections during training or without expensive MCMC for sampling. This is possible by learning the posterior with variational inference techniques. For this, an additional network (one for the likelihood or likelihood-to-evidence-ratio) must be trained first. Main syntax \u00b6 inference = SNLE ( prior ) for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) # In `SNLE` and `SNRE`, you should not pass the `proposal` to ` # .append_simulations()`. likelihood_estimator = inference . append_simulations ( theta , x , ) . train () # Obtain potential (learned likelihood * prior) and theta transformation. potential_fn , theta_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) # Build posterior via variational inference. posterior = VIPosterior ( potential_fn , prior , \"maf\" , theta_transform , vi_method = \"fKL\" , ) . train () proposal = posterior Linear Gaussian example \u00b6 Below, we give a full example of inferring the posterior distribution with SNVI over multiple rounds. For this, we take the same example as in the previous tutorial. import torch from sbi.inference import ( likelihood_estimator_based_potential , SNLE , prepare_for_sbi , simulate_for_sbi , VIPosterior , ) from sbi import utils as utils from sbi import analysis as analysis _ = torch . manual_seed ( 0 ) num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) Here, we decide to learn the likelihood directly, but learning the likelihood-to-evidence ratio would also be a viable option. inference = SNLE ( prior = prior ) Now we can run inference, where we first learn the likelihood, which is then in turn used to learn a posterior through variational inference. num_rounds = 2 x_o = torch . zeros ( 3 ,) posteriors = [] proposal = prior for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) likelihood_estimator = inference . append_simulations ( theta , x , ) . train () potential_fn , theta_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) posterior = VIPosterior ( potential_fn , prior , \"maf\" , theta_transform , vi_method = \"fKL\" , ) . train () posteriors . append ( posterior ) proposal = posterior Running 500 simulations.: 0%| | 0/500 [00:00<?, ?it/s] Neural network successfully converged after 50 epochs. 0%| | 0/2000 [00:00<?, ?it/s] Converged with loss: -4.23 Quality Score: -0.218 Good: Smaller than 0.5 Bad: Larger than 1.0 NOTE: Less sensitive to mode collapse. Running 500 simulations.: 0%| | 0/500 [00:00<?, ?it/s] Neural network successfully converged after 47 epochs. 0%| | 0/2000 [00:00<?, ?it/s] Converged with loss: -4.2 Quality Score: 0.025 Good: Smaller than 0.5 Bad: Larger than 1.0 NOTE: Less sensitive to mode collapse. After having learned the posterior, we use .pairplot() to visualize the samples from the learned posterior. posterior_samples = posterior . sample (( 10000 ,), x = x_o ) # Plot posterior samples. _ = analysis . pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) )","title":"Using Variational Inference for Building Posteriors"},{"location":"tutorial/17_vi_posteriors/#using-variational-inference-for-building-posteriors","text":"In the previous tutorial, we saw how to build the posterior and how to specialize on one specific observation x_o . If one uses SNPE, then the posterior can be sampled from directly, yet this comes at the expense of necessary correction terms during training, since the samples are obtained from the \u201cwrong\u201d prior for num_rounds > 1 . For SNLE or SNRE, MCMC sampling is required, which is computationally expensive. With SNVI (sequential neural variational inference), it is possible to directly sample from the posterior without any corrections during training or without expensive MCMC for sampling. This is possible by learning the posterior with variational inference techniques. For this, an additional network (one for the likelihood or likelihood-to-evidence-ratio) must be trained first.","title":"Using Variational Inference for Building Posteriors"},{"location":"tutorial/17_vi_posteriors/#main-syntax","text":"inference = SNLE ( prior ) for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) # In `SNLE` and `SNRE`, you should not pass the `proposal` to ` # .append_simulations()`. likelihood_estimator = inference . append_simulations ( theta , x , ) . train () # Obtain potential (learned likelihood * prior) and theta transformation. potential_fn , theta_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) # Build posterior via variational inference. posterior = VIPosterior ( potential_fn , prior , \"maf\" , theta_transform , vi_method = \"fKL\" , ) . train () proposal = posterior","title":"Main syntax"},{"location":"tutorial/17_vi_posteriors/#linear-gaussian-example","text":"Below, we give a full example of inferring the posterior distribution with SNVI over multiple rounds. For this, we take the same example as in the previous tutorial. import torch from sbi.inference import ( likelihood_estimator_based_potential , SNLE , prepare_for_sbi , simulate_for_sbi , VIPosterior , ) from sbi import utils as utils from sbi import analysis as analysis _ = torch . manual_seed ( 0 ) num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) Here, we decide to learn the likelihood directly, but learning the likelihood-to-evidence ratio would also be a viable option. inference = SNLE ( prior = prior ) Now we can run inference, where we first learn the likelihood, which is then in turn used to learn a posterior through variational inference. num_rounds = 2 x_o = torch . zeros ( 3 ,) posteriors = [] proposal = prior for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) likelihood_estimator = inference . append_simulations ( theta , x , ) . train () potential_fn , theta_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) posterior = VIPosterior ( potential_fn , prior , \"maf\" , theta_transform , vi_method = \"fKL\" , ) . train () posteriors . append ( posterior ) proposal = posterior Running 500 simulations.: 0%| | 0/500 [00:00<?, ?it/s] Neural network successfully converged after 50 epochs. 0%| | 0/2000 [00:00<?, ?it/s] Converged with loss: -4.23 Quality Score: -0.218 Good: Smaller than 0.5 Bad: Larger than 1.0 NOTE: Less sensitive to mode collapse. Running 500 simulations.: 0%| | 0/500 [00:00<?, ?it/s] Neural network successfully converged after 47 epochs. 0%| | 0/2000 [00:00<?, ?it/s] Converged with loss: -4.2 Quality Score: 0.025 Good: Smaller than 0.5 Bad: Larger than 1.0 NOTE: Less sensitive to mode collapse. After having learned the posterior, we use .pairplot() to visualize the samples from the learned posterior. posterior_samples = posterior . sample (( 10000 ,), x = x_o ) # Plot posterior samples. _ = analysis . pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) )","title":"Linear Gaussian example"}]}
\ No newline at end of file
+{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"sbi : simulation-based inference \u00b6 sbi : A Python toolbox for simulation-based inference. Inference can be run in a single line of code posterior = infer ( simulator , prior , method = 'SNPE' , num_simulations = 1000 ) or in a few lines for more flexibility: inference = SNPE ( prior = prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () sbi lets you choose from a variety of amortized and sequential SBI methods: Amortized methods return a posterior that can be applied to many different observations without retraining, whereas sequential methods focus the inference on one particular observation to be more simulation-efficient. For an overview of implemented methods see below, or checkout or GitHub page . Overview \u00b6 To learn about the general motivation behind simulation-based inference, and the inference methods included in sbi , read on below. For example applications to canonical problems in neuroscience, browse the recent research article Training deep neural density estimators to identify mechanistic models of neural dynamics . If you want to get started using sbi on your own problem, jump to installation and then check out the tutorial . Motivation and approach \u00b6 Many areas of science and engineering make extensive use of complex, stochastic, numerical simulations to describe the structure and dynamics of the processes being investigated. A key challenge in simulation-based science is constraining these simulation models\u2019 parameters, which are intepretable quantities, with observational data. Bayesian inference provides a general and powerful framework to invert the simulators, i.e. describe the parameters which are consistent both with empirical data and prior knowledge. In the case of simulators, a key quantity required for statistical inference, the likelihood of observed data given parameters, \\(\\mathcal{L}(\\theta) = p(x_o|\\theta)\\) , is typically intractable, rendering conventional statistical approaches inapplicable. sbi implements powerful machine-learning methods that address this problem. Roughly, these algorithms can be categorized as: Neural Posterior Estimation (amortized NPE and sequential SNPE ), Neural Likelihood Estimation ( (S)NLE ), and Neural Ratio Estimation ( (S)NRE ). Depending on the characteristics of the problem, e.g. the dimensionalities of the parameter space and the observation space, one of the methods will be more suitable. Goal: Algorithmically identify mechanistic models which are consistent with data. Each of the methods above needs three inputs: A candidate mechanistic model, prior knowledge or constraints on model parameters, and observational data (or summary statistics thereof). The methods then proceed by sampling parameters from the prior followed by simulating synthetic data from these parameters, learning the (probabilistic) association between data (or data features) and underlying parameters, i.e. to learn statistical inference from simulated data. The way in which this association is learned differs between the above methods, but all use deep neural networks. This learned neural network is then applied to empirical data to derive the full space of parameters consistent with the data and the prior, i.e. the posterior distribution. High posterior probability is assigned to parameters which are consistent with both the data and the prior, low probability to inconsistent parameters. While SNPE directly learns the posterior distribution, SNLE and SNRE need an extra MCMC sampling step to construct a posterior. If needed, an initial estimate of the posterior can be used to adaptively generate additional informative simulations. Publications \u00b6 See Cranmer, Brehmer, Louppe (2020) for a recent review on simulation-based inference. The following papers offer additional details on the inference methods implemented in sbi . You can find a tutorial on how to run each of these methods here . Posterior estimation ( (S)NPE ) \u00b6 Fast \u03b5-free Inference of Simulation Models with Bayesian Conditional Density Estimation by Papamakarios & Murray (NeurIPS 2016) [PDF] [BibTeX] Flexible statistical inference for mechanistic models of neural dynamics by Lueckmann, Goncalves, Bassetto, \u00d6cal, Nonnenmacher & Macke (NeurIPS 2017) [PDF] [BibTeX] Automatic posterior transformation for likelihood-free inference by Greenberg, Nonnenmacher & Macke (ICML 2019) [PDF] [BibTeX] Truncated proposals for scalable and hassle-free simulation-based inference by Deistler, Goncalves & Macke (NeurIPS 2022) [Paper] Likelihood-estimation ( (S)NLE ) \u00b6 Sequential neural likelihood: Fast likelihood-free inference with autoregressive flows by Papamakarios, Sterratt & Murray (AISTATS 2019) [PDF] [BibTeX] Variational methods for simulation-based inference by Gl\u00f6ckler, Deistler, Macke (ICLR 2022) [Paper] Flexible and efficient simulation-based inference for models of decision-making by Boelts, Lueckmann, Gao, Macke (Elife 2022) [Paper] Likelihood-ratio-estimation ( (S)NRE ) \u00b6 Likelihood-free MCMC with Amortized Approximate Likelihood Ratios by Hermans, Begy & Louppe (ICML 2020) [PDF] On Contrastive Learning for Likelihood-free Inference Durkan, Murray & Papamakarios (ICML 2020) [PDF] Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation by Delaunoy, Hermans, Rozet, Wehenkel & Louppe (NeurIPS 2022) [PDF] Contrastive Neural Ratio Estimation Benjamin Kurt Miller, Christoph Weniger, Patrick Forr\u00e9 (NeurIPS 2022) [PDF] Utilities \u00b6 Restriction estimator by Deistler, Macke & Goncalves (PNAS 2022) [Paper] Simulation-based calibration by Talts, Betancourt, Simpson, Vehtari, Gelman (arxiv 2018) [Paper] ) Expected coverage (sample-based) as computed in Deistler, Goncalves, Macke [Paper] and in Rozet, Louppe [Paper]","title":"Home"},{"location":"#sbi-simulation-based-inference","text":"sbi : A Python toolbox for simulation-based inference. Inference can be run in a single line of code posterior = infer ( simulator , prior , method = 'SNPE' , num_simulations = 1000 ) or in a few lines for more flexibility: inference = SNPE ( prior = prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () sbi lets you choose from a variety of amortized and sequential SBI methods: Amortized methods return a posterior that can be applied to many different observations without retraining, whereas sequential methods focus the inference on one particular observation to be more simulation-efficient. For an overview of implemented methods see below, or checkout or GitHub page .","title":"sbi: simulation-based inference"},{"location":"#overview","text":"To learn about the general motivation behind simulation-based inference, and the inference methods included in sbi , read on below. For example applications to canonical problems in neuroscience, browse the recent research article Training deep neural density estimators to identify mechanistic models of neural dynamics . If you want to get started using sbi on your own problem, jump to installation and then check out the tutorial .","title":"Overview"},{"location":"#motivation-and-approach","text":"Many areas of science and engineering make extensive use of complex, stochastic, numerical simulations to describe the structure and dynamics of the processes being investigated. A key challenge in simulation-based science is constraining these simulation models\u2019 parameters, which are intepretable quantities, with observational data. Bayesian inference provides a general and powerful framework to invert the simulators, i.e. describe the parameters which are consistent both with empirical data and prior knowledge. In the case of simulators, a key quantity required for statistical inference, the likelihood of observed data given parameters, \\(\\mathcal{L}(\\theta) = p(x_o|\\theta)\\) , is typically intractable, rendering conventional statistical approaches inapplicable. sbi implements powerful machine-learning methods that address this problem. Roughly, these algorithms can be categorized as: Neural Posterior Estimation (amortized NPE and sequential SNPE ), Neural Likelihood Estimation ( (S)NLE ), and Neural Ratio Estimation ( (S)NRE ). Depending on the characteristics of the problem, e.g. the dimensionalities of the parameter space and the observation space, one of the methods will be more suitable. Goal: Algorithmically identify mechanistic models which are consistent with data. Each of the methods above needs three inputs: A candidate mechanistic model, prior knowledge or constraints on model parameters, and observational data (or summary statistics thereof). The methods then proceed by sampling parameters from the prior followed by simulating synthetic data from these parameters, learning the (probabilistic) association between data (or data features) and underlying parameters, i.e. to learn statistical inference from simulated data. The way in which this association is learned differs between the above methods, but all use deep neural networks. This learned neural network is then applied to empirical data to derive the full space of parameters consistent with the data and the prior, i.e. the posterior distribution. High posterior probability is assigned to parameters which are consistent with both the data and the prior, low probability to inconsistent parameters. While SNPE directly learns the posterior distribution, SNLE and SNRE need an extra MCMC sampling step to construct a posterior. If needed, an initial estimate of the posterior can be used to adaptively generate additional informative simulations.","title":"Motivation and approach"},{"location":"#publications","text":"See Cranmer, Brehmer, Louppe (2020) for a recent review on simulation-based inference. The following papers offer additional details on the inference methods implemented in sbi . You can find a tutorial on how to run each of these methods here .","title":"Publications"},{"location":"#posterior-estimation-snpe","text":"Fast \u03b5-free Inference of Simulation Models with Bayesian Conditional Density Estimation by Papamakarios & Murray (NeurIPS 2016) [PDF] [BibTeX] Flexible statistical inference for mechanistic models of neural dynamics by Lueckmann, Goncalves, Bassetto, \u00d6cal, Nonnenmacher & Macke (NeurIPS 2017) [PDF] [BibTeX] Automatic posterior transformation for likelihood-free inference by Greenberg, Nonnenmacher & Macke (ICML 2019) [PDF] [BibTeX] Truncated proposals for scalable and hassle-free simulation-based inference by Deistler, Goncalves & Macke (NeurIPS 2022) [Paper]","title":"Posterior estimation ((S)NPE)"},{"location":"#likelihood-estimation-snle","text":"Sequential neural likelihood: Fast likelihood-free inference with autoregressive flows by Papamakarios, Sterratt & Murray (AISTATS 2019) [PDF] [BibTeX] Variational methods for simulation-based inference by Gl\u00f6ckler, Deistler, Macke (ICLR 2022) [Paper] Flexible and efficient simulation-based inference for models of decision-making by Boelts, Lueckmann, Gao, Macke (Elife 2022) [Paper]","title":"Likelihood-estimation ((S)NLE)"},{"location":"#likelihood-ratio-estimation-snre","text":"Likelihood-free MCMC with Amortized Approximate Likelihood Ratios by Hermans, Begy & Louppe (ICML 2020) [PDF] On Contrastive Learning for Likelihood-free Inference Durkan, Murray & Papamakarios (ICML 2020) [PDF] Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation by Delaunoy, Hermans, Rozet, Wehenkel & Louppe (NeurIPS 2022) [PDF] Contrastive Neural Ratio Estimation Benjamin Kurt Miller, Christoph Weniger, Patrick Forr\u00e9 (NeurIPS 2022) [PDF]","title":"Likelihood-ratio-estimation ((S)NRE)"},{"location":"#utilities","text":"Restriction estimator by Deistler, Macke & Goncalves (PNAS 2022) [Paper] Simulation-based calibration by Talts, Betancourt, Simpson, Vehtari, Gelman (arxiv 2018) [Paper] ) Expected coverage (sample-based) as computed in Deistler, Goncalves, Macke [Paper] and in Rozet, Louppe [Paper]","title":"Utilities"},{"location":"citation/","text":"Citation \u00b6 If you use sbi consider citing the sbi software paper , in addition to the original research articles describing the specific sbi-algorithm(s) you are using. @article { tejero-cantero2020sbi, doi = { 10.21105/joss.02505 } , url = { https://doi.org/10.21105/joss.02505 } , year = { 2020 } , publisher = { The Open Journal } , volume = { 5 } , number = { 52 } , pages = { 2505 } , author = { Alvaro Tejero-Cantero and Jan Boelts and Michael Deistler and Jan-Matthis Lueckmann and Conor Durkan and Pedro J. Gon\u00e7alves and David S. Greenberg and Jakob H. Macke } , title = { sbi: A toolkit for simulation-based inference } , journal = { Journal of Open Source Software } } The above citation refers to the original version of the sbi project and has a persistent DOI. Additionally, new releases of sbi are citable via Zenodo , where we create a new DOI for every release.","title":"Citation"},{"location":"citation/#citation","text":"If you use sbi consider citing the sbi software paper , in addition to the original research articles describing the specific sbi-algorithm(s) you are using. @article { tejero-cantero2020sbi, doi = { 10.21105/joss.02505 } , url = { https://doi.org/10.21105/joss.02505 } , year = { 2020 } , publisher = { The Open Journal } , volume = { 5 } , number = { 52 } , pages = { 2505 } , author = { Alvaro Tejero-Cantero and Jan Boelts and Michael Deistler and Jan-Matthis Lueckmann and Conor Durkan and Pedro J. Gon\u00e7alves and David S. Greenberg and Jakob H. Macke } , title = { sbi: A toolkit for simulation-based inference } , journal = { Journal of Open Source Software } } The above citation refers to the original version of the sbi project and has a persistent DOI. Additionally, new releases of sbi are citable via Zenodo , where we create a new DOI for every release.","title":"Citation"},{"location":"code_of_conduct/","text":"Contributor Covenant Code of Conduct \u00b6 Our Pledge \u00b6 We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. Our Standards \u00b6 Examples of behavior that contributes to a positive environment for our community include: Demonstrating empathy and kindness toward other people Being respectful of differing opinions, viewpoints, and experiences Giving and gracefully accepting constructive feedback Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: The use of sexualized language or imagery, and sexual attention or advances of any kind Trolling, insulting or derogatory comments, and personal or political attacks Public or private harassment Publishing others\u2019 private information, such as a physical or email address, without their explicit permission Other conduct which could reasonably be considered inappropriate in a professional setting Enforcement Responsibilities \u00b6 Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. Scope \u00b6 This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Enforcement \u00b6 Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting sbi developer Jan Boelts via email ( jan.boelts@uni-tuebingen.de ). All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident. Enforcement Guidelines \u00b6 Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: 1. Correction \u00b6 Community Impact : Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. Consequence : A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested. 2. Warning \u00b6 Community Impact : A violation through a single incident or series of actions. Consequence : A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban. 3. Temporary Ban \u00b6 Community Impact : A serious violation of community standards, including sustained inappropriate behavior. Consequence : A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. 4. Permanent Ban \u00b6 Community Impact : Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. Consequence : A permanent ban from any sort of public interaction within the community. Attribution \u00b6 This Code of Conduct is adapted from the Contributor Covenant , version 2.1, available at https://www.contributor-covenant.org/version/2/1/code_of_conduct.html . Community Impact Guidelines were inspired by Mozilla\u2019s code of conduct enforcement ladder . For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq . Translations are available at https://www.contributor-covenant.org/translations .","title":"Code of Conduct"},{"location":"code_of_conduct/#contributor-covenant-code-of-conduct","text":"","title":"Contributor Covenant Code of Conduct"},{"location":"code_of_conduct/#our-pledge","text":"We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.","title":"Our Pledge"},{"location":"code_of_conduct/#our-standards","text":"Examples of behavior that contributes to a positive environment for our community include: Demonstrating empathy and kindness toward other people Being respectful of differing opinions, viewpoints, and experiences Giving and gracefully accepting constructive feedback Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: The use of sexualized language or imagery, and sexual attention or advances of any kind Trolling, insulting or derogatory comments, and personal or political attacks Public or private harassment Publishing others\u2019 private information, such as a physical or email address, without their explicit permission Other conduct which could reasonably be considered inappropriate in a professional setting","title":"Our Standards"},{"location":"code_of_conduct/#enforcement-responsibilities","text":"Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate.","title":"Enforcement Responsibilities"},{"location":"code_of_conduct/#scope","text":"This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.","title":"Scope"},{"location":"code_of_conduct/#enforcement","text":"Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting sbi developer Jan Boelts via email ( jan.boelts@uni-tuebingen.de ). All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident.","title":"Enforcement"},{"location":"code_of_conduct/#enforcement-guidelines","text":"Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct:","title":"Enforcement Guidelines"},{"location":"code_of_conduct/#1-correction","text":"Community Impact : Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. Consequence : A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested.","title":"1. Correction"},{"location":"code_of_conduct/#2-warning","text":"Community Impact : A violation through a single incident or series of actions. Consequence : A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban.","title":"2. Warning"},{"location":"code_of_conduct/#3-temporary-ban","text":"Community Impact : A serious violation of community standards, including sustained inappropriate behavior. Consequence : A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban.","title":"3. Temporary Ban"},{"location":"code_of_conduct/#4-permanent-ban","text":"Community Impact : Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. Consequence : A permanent ban from any sort of public interaction within the community.","title":"4. Permanent Ban"},{"location":"code_of_conduct/#attribution","text":"This Code of Conduct is adapted from the Contributor Covenant , version 2.1, available at https://www.contributor-covenant.org/version/2/1/code_of_conduct.html . Community Impact Guidelines were inspired by Mozilla\u2019s code of conduct enforcement ladder . For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq . Translations are available at https://www.contributor-covenant.org/translations .","title":"Attribution"},{"location":"contribute/","text":"User experiences, bugs, and feature requests \u00b6 If you are using sbi to infer the parameters of a simulator, we would be delighted to know how it worked for you. If it didn\u2019t work according to plan, please open up an issue and tell us more about your use case: the dimensionality of the input parameters and of the output, as well as the setup you used to run inference (i.e. number of simulations, number of rounds,\u2026). To report bugs and suggest features (including better documentation), please equally head over to issues on GitHub . Code contributions \u00b6 Contributions to the sbi package are welcome! In general, we use pull requests to make changes to sbi . So, if you are planning to make a contribution, please fork, create a feature branch and then make a PR from your feature branch to the upstream sbi ( details ). To give credits to contributors, we consider adding contributors who repeatedly and substantially contributed to sbi to the list of authors of the package at the end of every year. Additionally, we mention all contributors in the releases. Development environment \u00b6 Clone the repo and install all the dependencies using the environment.yml file to create a conda environment: conda env create -f environment.yml . If you already have an sbi environment and want to refresh dependencies, just run conda env update -f environment.yml --prune . Alternatively, you can install via setup.py using pip install -e \".[dev]\" (the dev flag installs development and testing dependencies). Style conventions \u00b6 For docstrings and comments, we use Google Style . Code needs to pass through the following tools, which are installed alongside sbi : black : Automatic code formatting for Python. You can run black manually from the console using black . in the top directory of the repository, which will format all files. isort : Used to consistently order imports. You can run isort manually from the console using isort in the top directory. pyright : Used for static type checking. black and isort and pyright are checked as part of our CI actions. If these checks fail please make sure you have installed the latest versions for each of them and run them locally. Online documentation \u00b6 Most of the documentation is written in markdown ( basic markdown guide ). You can directly fix mistakes and suggest clearer formulations in markdown files simply by initiating a PR on through GitHub. Click on documentation file and look for the little pencil at top right.","title":"Guide"},{"location":"contribute/#user-experiences-bugs-and-feature-requests","text":"If you are using sbi to infer the parameters of a simulator, we would be delighted to know how it worked for you. If it didn\u2019t work according to plan, please open up an issue and tell us more about your use case: the dimensionality of the input parameters and of the output, as well as the setup you used to run inference (i.e. number of simulations, number of rounds,\u2026). To report bugs and suggest features (including better documentation), please equally head over to issues on GitHub .","title":"User experiences, bugs, and feature requests"},{"location":"contribute/#code-contributions","text":"Contributions to the sbi package are welcome! In general, we use pull requests to make changes to sbi . So, if you are planning to make a contribution, please fork, create a feature branch and then make a PR from your feature branch to the upstream sbi ( details ). To give credits to contributors, we consider adding contributors who repeatedly and substantially contributed to sbi to the list of authors of the package at the end of every year. Additionally, we mention all contributors in the releases.","title":"Code contributions"},{"location":"contribute/#development-environment","text":"Clone the repo and install all the dependencies using the environment.yml file to create a conda environment: conda env create -f environment.yml . If you already have an sbi environment and want to refresh dependencies, just run conda env update -f environment.yml --prune . Alternatively, you can install via setup.py using pip install -e \".[dev]\" (the dev flag installs development and testing dependencies).","title":"Development environment"},{"location":"contribute/#style-conventions","text":"For docstrings and comments, we use Google Style . Code needs to pass through the following tools, which are installed alongside sbi : black : Automatic code formatting for Python. You can run black manually from the console using black . in the top directory of the repository, which will format all files. isort : Used to consistently order imports. You can run isort manually from the console using isort in the top directory. pyright : Used for static type checking. black and isort and pyright are checked as part of our CI actions. If these checks fail please make sure you have installed the latest versions for each of them and run them locally.","title":"Style conventions"},{"location":"contribute/#online-documentation","text":"Most of the documentation is written in markdown ( basic markdown guide ). You can directly fix mistakes and suggest clearer formulations in markdown files simply by initiating a PR on through GitHub. Click on documentation file and look for the little pencil at top right.","title":"Online documentation"},{"location":"credits/","text":"Credits \u00b6 License \u00b6 sbi is licensed under the Affero General Public License version 3 (AGPLv3) and Copyright (C) 2020 \u00c1lvaro Tejero-Cantero, Jakob H. Macke, Jan-Matthis L\u00fcckmann, Michael Deistler, Jan F. B\u00f6lts. Copyright (C) 2020 Conor M. Durkan. Support \u00b6 sbi has been supported by the German Federal Ministry of Education and Research (BMBF) through the project ADIMEM, FKZ 01IS18052 A-D). ADIMEM is a collaborative project between the groups of Jakob Macke (Uni T\u00fcbingen), Philipp Berens (Uni T\u00fcbingen), Philipp Hennig (Uni T\u00fcbingen) and Marcel Oberlaender (caesar Bonn) which aims to develop inference methods for mechanistic models. Important dependencies and prior art \u00b6 sbi is the successor to delfi , a Theano-based toolbox for sequential neural posterior estimation developed at mackelab . If you were using delfi , we strongly recommend to move your inference over to sbi . Please open issues if you find unexpected behaviour or missing features. We will consider these bugs and give them priority. sbi as a PyTorch-based toolbox started as a fork of conormdurkan/lfi , by Conor M.Durkan . sbi uses density estimators from bayesiains/nflows by Conor M.Durkan , George Papamakarios and Artur Bekasov . These are proxied through pyknos , a package focused on density estimation. sbi uses PyTorch and tries to align with the interfaces (e.g. for probability distributions) adopted by PyTorch . See README.md for a list of publications describing the methods implemented in sbi .","title":"Credits"},{"location":"credits/#credits","text":"","title":"Credits"},{"location":"credits/#license","text":"sbi is licensed under the Affero General Public License version 3 (AGPLv3) and Copyright (C) 2020 \u00c1lvaro Tejero-Cantero, Jakob H. Macke, Jan-Matthis L\u00fcckmann, Michael Deistler, Jan F. B\u00f6lts. Copyright (C) 2020 Conor M. Durkan.","title":"License"},{"location":"credits/#support","text":"sbi has been supported by the German Federal Ministry of Education and Research (BMBF) through the project ADIMEM, FKZ 01IS18052 A-D). ADIMEM is a collaborative project between the groups of Jakob Macke (Uni T\u00fcbingen), Philipp Berens (Uni T\u00fcbingen), Philipp Hennig (Uni T\u00fcbingen) and Marcel Oberlaender (caesar Bonn) which aims to develop inference methods for mechanistic models.","title":"Support"},{"location":"credits/#important-dependencies-and-prior-art","text":"sbi is the successor to delfi , a Theano-based toolbox for sequential neural posterior estimation developed at mackelab . If you were using delfi , we strongly recommend to move your inference over to sbi . Please open issues if you find unexpected behaviour or missing features. We will consider these bugs and give them priority. sbi as a PyTorch-based toolbox started as a fork of conormdurkan/lfi , by Conor M.Durkan . sbi uses density estimators from bayesiains/nflows by Conor M.Durkan , George Papamakarios and Artur Bekasov . These are proxied through pyknos , a package focused on density estimation. sbi uses PyTorch and tries to align with the interfaces (e.g. for probability distributions) adopted by PyTorch . See README.md for a list of publications describing the methods implemented in sbi .","title":"Important dependencies and prior art"},{"location":"faq/","text":"Frequently asked questions \u00b6 Can the algorithms deal with invalid data, e.g. NaN or inf? What should I do when my \u2018posterior samples are outside of the prior support\u2019 in SNPE? When using multiple workers, I get a pickling error. Can I still use multiprocessing? Can I use the GPU for training the density estimator? How should I save and load objects in sbi ? Can I stop neural network training and resume it later? How can I use a prior that is not defined in PyTorch?","title":"FAQ"},{"location":"faq/#frequently-asked-questions","text":"Can the algorithms deal with invalid data, e.g. NaN or inf? What should I do when my \u2018posterior samples are outside of the prior support\u2019 in SNPE? When using multiple workers, I get a pickling error. Can I still use multiprocessing? Can I use the GPU for training the density estimator? How should I save and load objects in sbi ? Can I stop neural network training and resume it later? How can I use a prior that is not defined in PyTorch?","title":"Frequently asked questions"},{"location":"install/","text":"Installation \u00b6 sbi requires Python 3.6 or higher. We recommend to use a conda virtual environment ( Miniconda installation instructions ). If conda is installed on the system, an environment for installing sbi can be created as follows: # Create an environment for sbi (indicate Python 3.6 or higher); activate it $ conda create -n sbi_env python=3.7 && conda activate sbi_env Independent of whether you are using conda or not, sbi can be installed using pip : $ pip install sbi To test the installation, drop into a python prompt and run from sbi.examples.minimal import simple posterior = simple () print ( posterior )","title":"Installation"},{"location":"install/#installation","text":"sbi requires Python 3.6 or higher. We recommend to use a conda virtual environment ( Miniconda installation instructions ). If conda is installed on the system, an environment for installing sbi can be created as follows: # Create an environment for sbi (indicate Python 3.6 or higher); activate it $ conda create -n sbi_env python=3.7 && conda activate sbi_env Independent of whether you are using conda or not, sbi can be installed using pip : $ pip install sbi To test the installation, drop into a python prompt and run from sbi.examples.minimal import simple posterior = simple () print ( posterior )","title":"Installation"},{"location":"reference/","text":"API Reference \u00b6 Inference \u00b6 sbi . inference . base . infer ( simulator , prior , method , num_simulations , num_workers = 1 ) \u00b6 Runs simulation-based inference and returns the posterior. This function provides a simple interface to run sbi. Inference is run for a single round and hence the returned posterior \\(p(\\theta|x)\\) can be sampled and evaluated for any \\(x\\) (i.e. it is amortized). The scope of this function is limited to the most essential features of sbi. For more flexibility (e.g. multi-round inference, different density estimators) please use the flexible interface described here: https://www.mackelab.org/sbi/tutorial/02_flexible_interface/ Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\mathrm{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required prior Distribution A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. required method str What inference method to use. Either of SNPE, SNLE or SNRE. required num_simulations int Number of simulation calls. More simulations means a longer runtime, but a better posterior estimate. required num_workers int Number of parallel workers to use for simulations. 1 Returns: Posterior over parameters conditional on observations (amortized). Source code in sbi/inference/base.py def infer ( simulator : Callable , prior : Distribution , method : str , num_simulations : int , num_workers : int = 1 , ) -> NeuralPosterior : r \"\"\"Runs simulation-based inference and returns the posterior. This function provides a simple interface to run sbi. Inference is run for a single round and hence the returned posterior $p(\\theta|x)$ can be sampled and evaluated for any $x$ (i.e. it is amortized). The scope of this function is limited to the most essential features of sbi. For more flexibility (e.g. multi-round inference, different density estimators) please use the flexible interface described here: https://www.mackelab.org/sbi/tutorial/02_flexible_interface/ Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\mathrm{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. method: What inference method to use. Either of SNPE, SNLE or SNRE. num_simulations: Number of simulation calls. More simulations means a longer runtime, but a better posterior estimate. num_workers: Number of parallel workers to use for simulations. Returns: Posterior over parameters conditional on observations (amortized). \"\"\" try : method_fun : Callable = getattr ( sbi . inference , method . upper ()) except AttributeError : raise NameError ( \"Method not available. `method` must be one of 'SNPE', 'SNLE', 'SNRE'.\" ) simulator , prior = prepare_for_sbi ( simulator , prior ) inference = method_fun ( prior = prior ) theta , x = simulate_for_sbi ( simulator = simulator , proposal = prior , num_simulations = num_simulations , num_workers = num_workers , ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () return posterior sbi . utils . user_input_checks . prepare_for_sbi ( simulator , prior ) \u00b6 Prepare simulator and prior for usage in sbi. NOTE: This is a wrapper around process_prior and process_simulator which can be used in isolation as well. Attempts to meet the following requirements by reshaping and type-casting: the simulator function receives as input and returns a Tensor. the simulator can simulate batches of parameters and return batches of data. the prior does not produce batches and samples and evaluates to Tensor. the output shape is a torch.Size((1,N)) (i.e, has a leading batch dimension 1). If this is not possible, a suitable exception will be raised. Parameters: Name Type Description Default simulator Callable Simulator as provided by the user. required prior Prior as provided by the user. required Returns: Type Description Tuple[Callable, torch.distributions.distribution.Distribution] Tuple (simulator, prior) checked and matching the requirements of sbi. Source code in sbi/utils/user_input_checks.py def prepare_for_sbi ( simulator : Callable , prior ) -> Tuple [ Callable , Distribution ]: \"\"\"Prepare simulator and prior for usage in sbi. NOTE: This is a wrapper around `process_prior` and `process_simulator` which can be used in isolation as well. Attempts to meet the following requirements by reshaping and type-casting: - the simulator function receives as input and returns a Tensor.<br/> - the simulator can simulate batches of parameters and return batches of data.<br/> - the prior does not produce batches and samples and evaluates to Tensor.<br/> - the output shape is a `torch.Size((1,N))` (i.e, has a leading batch dimension 1). If this is not possible, a suitable exception will be raised. Args: simulator: Simulator as provided by the user. prior: Prior as provided by the user. Returns: Tuple (simulator, prior) checked and matching the requirements of sbi. \"\"\" # Check prior, return PyTorch prior. prior , _ , prior_returns_numpy = process_prior ( prior ) # Check simulator, returns PyTorch simulator able to simulate batches. simulator = process_simulator ( simulator , prior , prior_returns_numpy ) # Consistency check after making ready for sbi. check_sbi_inputs ( simulator , prior ) return simulator , prior sbi . inference . base . simulate_for_sbi ( simulator , proposal , num_simulations , num_workers = 1 , simulation_batch_size = 1 , seed = None , show_progress_bar = True ) \u00b6 Returns ( \\(\\theta, x\\) ) pairs obtained from sampling the proposal and simulating. This function performs two steps: Sample parameters \\(\\theta\\) from the proposal . Simulate these parameters to obtain \\(x\\) . Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\text{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required proposal Any Probability distribution that the parameters \\(\\theta\\) are sampled from. required num_simulations int Number of simulations that are run. required num_workers int Number of parallel workers to use for simulations. 1 simulation_batch_size int Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). 1 seed Optional[int] Seed for reproducibility. None show_progress_bar bool Whether to show a progress bar for simulating. This will not affect whether there will be a progressbar while drawing samples from the proposal. True Returns: Sampled parameters \\(\\theta\\) and simulation-outputs \\(x\\) . Source code in sbi/inference/base.py def simulate_for_sbi ( simulator : Callable , proposal : Any , num_simulations : int , num_workers : int = 1 , simulation_batch_size : int = 1 , seed : Optional [ int ] = None , show_progress_bar : bool = True , ) -> Tuple [ Tensor , Tensor ]: r \"\"\"Returns ($\\theta, x$) pairs obtained from sampling the proposal and simulating. This function performs two steps: - Sample parameters $\\theta$ from the `proposal`. - Simulate these parameters to obtain $x$. Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\text{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. proposal: Probability distribution that the parameters $\\theta$ are sampled from. num_simulations: Number of simulations that are run. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). seed: Seed for reproducibility. show_progress_bar: Whether to show a progress bar for simulating. This will not affect whether there will be a progressbar while drawing samples from the proposal. Returns: Sampled parameters $\\theta$ and simulation-outputs $x$. \"\"\" theta = proposal . sample (( num_simulations ,)) x = simulate_in_batches ( simulator = simulator , theta = theta , sim_batch_size = simulation_batch_size , num_workers = num_workers , seed = seed , show_progress_bars = show_progress_bar , ) return theta , x sbi.inference.snpe.snpe_a.SNPE_A ( PosteriorEstimator ) \u00b6 __init__ ( self , prior = None , density_estimator = 'mdn_snpe_a' , num_components = 10 , device = 'cpu' , logging_level = 'WARNING' , summary_writer = None , show_progress_bars = True ) special \u00b6 SNPE-A [1]. [1] Fast epsilon-free Inference of Simulation Models with Bayesian Conditional Density Estimation , Papamakarios et al., NeurIPS 2016, https://arxiv.org/abs/1605.06376 . This class implements SNPE-A. SNPE-A trains across multiple rounds with a maximum-likelihood-loss. This will make training converge to the proposal posterior instead of the true posterior. To correct for this, SNPE-A applies a post-hoc correction after training. This correction has to be performed analytically. Thus, SNPE-A is limited to Gaussian distributions for all but the last round. In the last round, SNPE-A can use a Mixture of Gaussians. Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. None density_estimator Union[str, Callable] If it is a string (only \u201cmdn_snpe_a\u201d is valid), use a pre-configured mixture of densities network. Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the density estimator. The density estimator needs to provide the methods .log_prob and .sample() . Note that until the last round only a single (multivariate) Gaussian component is used for training (see Algorithm 1 in [1]). In the last round, this component is replicated num_components times, its parameters are perturbed with a very small noise, and then the last training round is done with the expanded Gaussian mixture as estimator for the proposal posterior. 'mdn_snpe_a' num_components int Number of components of the mixture of Gaussians in the last round. This overrides the num_components value passed to posterior_nn() . 10 device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'WARNING' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is <current working directory>/logs .) None show_progress_bars bool Whether to show a progressbar during training. True Source code in sbi/inference/snpe/snpe_a.py def __init__ ( self , prior : Optional [ Distribution ] = None , density_estimator : Union [ str , Callable ] = \"mdn_snpe_a\" , num_components : int = 10 , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"WARNING\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"SNPE-A [1]. [1] _Fast epsilon-free Inference of Simulation Models with Bayesian Conditional Density Estimation_, Papamakarios et al., NeurIPS 2016, https://arxiv.org/abs/1605.06376. This class implements SNPE-A. SNPE-A trains across multiple rounds with a maximum-likelihood-loss. This will make training converge to the proposal posterior instead of the true posterior. To correct for this, SNPE-A applies a post-hoc correction after training. This correction has to be performed analytically. Thus, SNPE-A is limited to Gaussian distributions for all but the last round. In the last round, SNPE-A can use a Mixture of Gaussians. Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. density_estimator: If it is a string (only \"mdn_snpe_a\" is valid), use a pre-configured mixture of densities network. Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the density estimator. The density estimator needs to provide the methods `.log_prob` and `.sample()`. Note that until the last round only a single (multivariate) Gaussian component is used for training (see Algorithm 1 in [1]). In the last round, this component is replicated `num_components` times, its parameters are perturbed with a very small noise, and then the last training round is done with the expanded Gaussian mixture as estimator for the proposal posterior. num_components: Number of components of the mixture of Gaussians in the last round. This overrides the `num_components` value passed to `posterior_nn()`. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `<current working directory>/logs`.) show_progress_bars: Whether to show a progressbar during training. \"\"\" # Catch invalid inputs. if not (( density_estimator == \"mdn_snpe_a\" ) or callable ( density_estimator )): raise TypeError ( \"The `density_estimator` passed to SNPE_A needs to be a \" \"callable or the string 'mdn_snpe_a'!\" ) # `num_components` will be used to replicate the Gaussian in the last round. self . _num_components = num_components self . _ran_final_round = False # WARNING: sneaky trick ahead. We proxy the parent's `train` here, # requiring the signature to have `num_atoms`, save it for use below, and # continue. It's sneaky because we are using the object (self) as a namespace # to pass arguments between functions, and that's implicit state management. kwargs = utils . del_entries ( locals (), entries = ( \"self\" , \"__class__\" , \"num_components\" ), ) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , proposal = None , exclude_invalid_x = None , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required proposal Optional[sbi.inference.posteriors.direct_posterior.DirectPosterior] The distribution that the parameters \\(\\theta\\) were sampled from. Pass None if the parameters were sampled from the prior. If not None , it will trigger a different loss-function. None exclude_invalid_x Optional[bool] Whether invalid simulations are discarded during training. For single-round SNPE, it is fine to discard invalid simulations, but for multi-round SNPE (atomic), discarding invalid simulations gives systematically wrong results. If None , it will be True in the first round and False in later rounds. None data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description PosteriorEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snpe/snpe_a.py def append_simulations ( self , theta : Tensor , x : Tensor , proposal : Optional [ DirectPosterior ] = None , exclude_invalid_x : Optional [ bool ] = None , data_device : Optional [ str ] = None , ) -> \"PosteriorEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. proposal: The distribution that the parameters $\\theta$ were sampled from. Pass `None` if the parameters were sampled from the prior. If not `None`, it will trigger a different loss-function. exclude_invalid_x: Whether invalid simulations are discarded during training. For single-round SNPE, it is fine to discard invalid simulations, but for multi-round SNPE (atomic), discarding invalid simulations gives systematically wrong results. If `None`, it will be `True` in the first round and `False` in later rounds. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" if ( proposal is None or proposal is self . _prior or ( isinstance ( proposal , RestrictedPrior ) and proposal . _prior is self . _prior ) ): # The `_data_round_index` will later be used to infer if one should train # with MLE loss or with atomic loss (see, in `train()`: # self._round = max(self._data_round_index)) current_round = 0 else : if not self . _data_round_index : # This catches a pretty specific case: if, in the first round, one # passes data that does not come from the prior. current_round = 1 else : current_round = max ( self . _data_round_index ) + 1 if exclude_invalid_x is None : if current_round == 0 : exclude_invalid_x = True else : exclude_invalid_x = False if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x = exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) if ( type ( self ) . __name__ == \"SNPE_C\" and current_round > 0 and not self . use_non_atomic_loss ): nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"Multiround SNPE-C (atomic)\" , ) else : npe_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"Single-round NPE\" ) self . _check_proposal ( proposal ) self . _data_round_index . append ( current_round ) prior_masks = mask_sims_from_prior ( int ( current_round > 0 ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _proposal_roundwise . append ( proposal ) if self . _prior is None or isinstance ( self . _prior , ImproperEmpirical ): if proposal is not None : raise ValueError ( \"You had not passed a prior at initialization, but now you \" \"passed a proposal. If you want to run multi-round SNPE, you have \" \"to specify a prior (set the `.prior` argument or re-initialize \" \"the object with a prior distribution). If the samples you passed \" \"to `append_simulations()` were sampled from the prior, you can \" \"run single-round inference with \" \"`append_simulations(..., proposal=None)`.\" ) theta_prior = self . get_simulations ()[ 0 ] . to ( self . _device ) self . _prior = ImproperEmpirical ( theta_prior , ones ( theta_prior . shape [ 0 ], device = self . _device ) ) return self build_posterior ( self , density_estimator = None , prior = None ) \u00b6 Build posterior from the neural density estimator. This method first corrects the estimated density with correct_for_proposal and then returns a DirectPosterior . Parameters: Name Type Description Default density_estimator Optional[Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None Returns: Type Description DirectPosterior Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods. Source code in sbi/inference/snpe/snpe_a.py def build_posterior ( self , density_estimator : Optional [ TorchModule ] = None , prior : Optional [ Distribution ] = None , ) -> \"DirectPosterior\" : r \"\"\"Build posterior from the neural density estimator. This method first corrects the estimated density with `correct_for_proposal` and then returns a `DirectPosterior`. Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods. \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNPE_A(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior wrapped_density_estimator = self . correct_for_proposal ( density_estimator = density_estimator ) self . _posterior = DirectPosterior ( posterior_estimator = wrapped_density_estimator , # type: ignore prior = prior , ) return deepcopy ( self . _posterior ) correct_for_proposal ( self , density_estimator = None ) \u00b6 Build mixture of Gaussians that approximates the posterior. Returns a SNPE_A_MDN object, which applies the posthoc-correction required in SNPE-A. Parameters: Name Type Description Default density_estimator Optional[Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None Returns: Type Description SNPE_A_MDN Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods. Source code in sbi/inference/snpe/snpe_a.py def correct_for_proposal ( self , density_estimator : Optional [ TorchModule ] = None , ) -> \"SNPE_A_MDN\" : r \"\"\"Build mixture of Gaussians that approximates the posterior. Returns a `SNPE_A_MDN` object, which applies the posthoc-correction required in SNPE-A. Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods. \"\"\" if density_estimator is None : density_estimator = deepcopy ( self . _neural_net ) # PosteriorEstimator.train() also returns a deepcopy, mimic this here # If internal net is used device is defined. device = self . _device else : # Otherwise, infer it from the device of the net parameters. device = str ( next ( density_estimator . parameters ()) . device ) # Set proposal of the density estimator. # This also evokes the z-scoring correction if necessary. if ( self . _proposal_roundwise [ - 1 ] is self . _prior or self . _proposal_roundwise [ - 1 ] is None ): proposal = self . _prior assert isinstance ( proposal , ( MultivariateNormal , utils . BoxUniform ) ), \"\"\"Prior must be `torch.distributions.MultivariateNormal` or `sbi.utils. BoxUniform`\"\"\" else : assert isinstance ( self . _proposal_roundwise [ - 1 ], DirectPosterior ), \"\"\"The proposal you passed to `append_simulations` is neither the prior nor a `DirectPosterior`. SNPE-A currently only supports these scenarios. \"\"\" proposal = self . _proposal_roundwise [ - 1 ] # Create the SNPE_A_MDN wrapped_density_estimator = SNPE_A_MDN ( flow = density_estimator , # type: ignore proposal = proposal , prior = self . _prior , device = device , ) return wrapped_density_estimator get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snpe/snpe_a.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snpe/snpe_a.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , final_round = False , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , calibration_kernel = None , resume_training = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None , component_perturbation = 0.005 ) \u00b6 Return density estimator that approximates the proposal posterior. [1] Fast epsilon-free Inference of Simulation Models with Bayesian Conditional Density Estimation , Papamakarios et al., NeurIPS 2016, https://arxiv.org/abs/1605.06376 . Training is performed with maximum likelihood on samples from the latest round, which leads the algorithm to converge to the proposal posterior. Parameters: Name Type Description Default final_round bool Whether we are in the last round of training or not. For all but the last round, Algorithm 1 from [1] is executed. In last the round, Algorithm 2 from [1] is executed once. False training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 calibration_kernel Optional[Callable] A function to calibrate the loss with respect to the simulations x . See Lueckmann, Gon\u00e7alves et al., NeurIPS 2017. None resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False force_first_round_loss If True , train with maximum likelihood, i.e., potentially ignoring the correction for using a proposal distribution different from the prior. required retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. Not supported for SNPE-A. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None component_perturbation float The standard deviation applied to all weights and biases when, in the last round, the Mixture of Gaussians is build from a single Gaussian. This value can be problem-specific and also depends on the number of mixture components. 0.005 Returns: Type Description Module Density estimator that approximates the distribution \\(p(\\theta|x)\\) . Source code in sbi/inference/snpe/snpe_a.py def train ( self , final_round : bool = False , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , calibration_kernel : Optional [ Callable ] = None , resume_training : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , component_perturbation : float = 5e-3 , ) -> nn . Module : r \"\"\"Return density estimator that approximates the proposal posterior. [1] _Fast epsilon-free Inference of Simulation Models with Bayesian Conditional Density Estimation_, Papamakarios et al., NeurIPS 2016, https://arxiv.org/abs/1605.06376. Training is performed with maximum likelihood on samples from the latest round, which leads the algorithm to converge to the proposal posterior. Args: final_round: Whether we are in the last round of training or not. For all but the last round, Algorithm 1 from [1] is executed. In last the round, Algorithm 2 from [1] is executed once. training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. calibration_kernel: A function to calibrate the loss with respect to the simulations `x`. See Lueckmann, Gon\u00e7alves et al., NeurIPS 2017. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. force_first_round_loss: If `True`, train with maximum likelihood, i.e., potentially ignoring the correction for using a proposal distribution different from the prior. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. Not supported for SNPE-A. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) component_perturbation: The standard deviation applied to all weights and biases when, in the last round, the Mixture of Gaussians is build from a single Gaussian. This value can be problem-specific and also depends on the number of mixture components. Returns: Density estimator that approximates the distribution $p(\\theta|x)$. \"\"\" assert not retrain_from_scratch , \"\"\"Retraining from scratch is not supported in SNPE-A yet. The reason for this is that, if we reininitialized the density estimator, the z-scoring would change, which would break the posthoc correction. This is a pure implementation issue.\"\"\" kwargs = utils . del_entries ( locals (), entries = ( \"self\" , \"__class__\" , \"final_round\" , \"component_perturbation\" , ), ) # SNPE-A always discards the prior samples. kwargs [ \"discard_prior_samples\" ] = True kwargs [ \"force_first_round_loss\" ] = True self . _round = max ( self . _data_round_index ) if final_round : # If there is (will be) only one round, train with Algorithm 2 from [1]. if self . _round == 0 : self . _build_neural_net = partial ( self . _build_neural_net , num_components = self . _num_components ) # Run Algorithm 2 from [1]. elif not self . _ran_final_round : # Now switch to the specified number of components. This method will # only be used if `retrain_from_scratch=True`. Otherwise, # the MDN will be built from replicating the single-component net for # `num_component` times (via `_expand_mog()`). self . _build_neural_net = partial ( self . _build_neural_net , num_components = self . _num_components ) # Extend the MDN to the originally desired number of components. self . _expand_mog ( eps = component_perturbation ) else : warnings . warn ( \"You have already run SNPE-A with `final_round=True`. Running it\" \"again with this setting will not allow computing the posthoc\" \"correction applied in SNPE-A. Thus, you will get an error when \" \"calling `.build_posterior()` after training.\" , UserWarning , ) else : # Run Algorithm 1 from [1]. # Wrap the function that builds the MDN such that we can make # sure that there is only one component when running. self . _build_neural_net = partial ( self . _build_neural_net , num_components = 1 ) if final_round : self . _ran_final_round = True return super () . train ( ** kwargs ) sbi.inference.snpe.snpe_c.SNPE_C ( PosteriorEstimator ) \u00b6 __init__ ( self , prior = None , density_estimator = 'maf' , device = 'cpu' , logging_level = 'WARNING' , summary_writer = None , show_progress_bars = True ) special \u00b6 SNPE-C / APT [1]. [1] Automatic Posterior Transformation for Likelihood-free Inference , Greenberg et al., ICML 2019, https://arxiv.org/abs/1905.07488 . This class implements two loss variants of SNPE-C: the non-atomic and the atomic version. The atomic loss of SNPE-C can be used for any density estimator, i.e. also for normalizing flows. However, it suffers from leakage issues. On the other hand, the non-atomic loss can only be used only if the proposal distribution is a mixture of Gaussians, the density estimator is a mixture of Gaussians, and the prior is either Gaussian or Uniform. It does not suffer from leakage issues. At the beginning of each round, we print whether the non-atomic or the atomic version is used. In this codebase, we will automatically switch to the non-atomic loss if the following criteria are fulfilled: - proposal is a DirectPosterior with density_estimator mdn , as built with utils.sbi.posterior_nn() . - the density estimator is a mdn , as built with utils.sbi.posterior_nn() . - isinstance(prior, MultivariateNormal) (from torch.distributions ) or isinstance(prior, sbi.utils.BoxUniform) Note that custom implementations of any of these densities (or estimators) will not trigger the non-atomic loss, and the algorithm will fall back onto using the atomic loss. Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. None density_estimator Union[str, Callable] If it is a string, use a pre-configured network of the provided type (one of nsf, maf, mdn, made). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the density estimator. The density estimator needs to provide the methods .log_prob and .sample() . 'maf' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'WARNING' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is <current working directory>/logs .) None show_progress_bars bool Whether to show a progressbar during training. True Source code in sbi/inference/snpe/snpe_c.py def __init__ ( self , prior : Optional [ Distribution ] = None , density_estimator : Union [ str , Callable ] = \"maf\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"WARNING\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"SNPE-C / APT [1]. [1] _Automatic Posterior Transformation for Likelihood-free Inference_, Greenberg et al., ICML 2019, https://arxiv.org/abs/1905.07488. This class implements two loss variants of SNPE-C: the non-atomic and the atomic version. The atomic loss of SNPE-C can be used for any density estimator, i.e. also for normalizing flows. However, it suffers from leakage issues. On the other hand, the non-atomic loss can only be used only if the proposal distribution is a mixture of Gaussians, the density estimator is a mixture of Gaussians, and the prior is either Gaussian or Uniform. It does not suffer from leakage issues. At the beginning of each round, we print whether the non-atomic or the atomic version is used. In this codebase, we will automatically switch to the non-atomic loss if the following criteria are fulfilled:<br/> - proposal is a `DirectPosterior` with density_estimator `mdn`, as built with `utils.sbi.posterior_nn()`.<br/> - the density estimator is a `mdn`, as built with `utils.sbi.posterior_nn()`.<br/> - `isinstance(prior, MultivariateNormal)` (from `torch.distributions`) or `isinstance(prior, sbi.utils.BoxUniform)` Note that custom implementations of any of these densities (or estimators) will not trigger the non-atomic loss, and the algorithm will fall back onto using the atomic loss. Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. density_estimator: If it is a string, use a pre-configured network of the provided type (one of nsf, maf, mdn, made). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the density estimator. The density estimator needs to provide the methods `.log_prob` and `.sample()`. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `<current working directory>/logs`.) show_progress_bars: Whether to show a progressbar during training. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , proposal = None , exclude_invalid_x = None , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required proposal Optional[sbi.inference.posteriors.direct_posterior.DirectPosterior] The distribution that the parameters \\(\\theta\\) were sampled from. Pass None if the parameters were sampled from the prior. If not None , it will trigger a different loss-function. None exclude_invalid_x Optional[bool] Whether invalid simulations are discarded during training. For single-round SNPE, it is fine to discard invalid simulations, but for multi-round SNPE (atomic), discarding invalid simulations gives systematically wrong results. If None , it will be True in the first round and False in later rounds. None data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description PosteriorEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snpe/snpe_c.py def append_simulations ( self , theta : Tensor , x : Tensor , proposal : Optional [ DirectPosterior ] = None , exclude_invalid_x : Optional [ bool ] = None , data_device : Optional [ str ] = None , ) -> \"PosteriorEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. proposal: The distribution that the parameters $\\theta$ were sampled from. Pass `None` if the parameters were sampled from the prior. If not `None`, it will trigger a different loss-function. exclude_invalid_x: Whether invalid simulations are discarded during training. For single-round SNPE, it is fine to discard invalid simulations, but for multi-round SNPE (atomic), discarding invalid simulations gives systematically wrong results. If `None`, it will be `True` in the first round and `False` in later rounds. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" if ( proposal is None or proposal is self . _prior or ( isinstance ( proposal , RestrictedPrior ) and proposal . _prior is self . _prior ) ): # The `_data_round_index` will later be used to infer if one should train # with MLE loss or with atomic loss (see, in `train()`: # self._round = max(self._data_round_index)) current_round = 0 else : if not self . _data_round_index : # This catches a pretty specific case: if, in the first round, one # passes data that does not come from the prior. current_round = 1 else : current_round = max ( self . _data_round_index ) + 1 if exclude_invalid_x is None : if current_round == 0 : exclude_invalid_x = True else : exclude_invalid_x = False if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x = exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) if ( type ( self ) . __name__ == \"SNPE_C\" and current_round > 0 and not self . use_non_atomic_loss ): nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"Multiround SNPE-C (atomic)\" , ) else : npe_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"Single-round NPE\" ) self . _check_proposal ( proposal ) self . _data_round_index . append ( current_round ) prior_masks = mask_sims_from_prior ( int ( current_round > 0 ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _proposal_roundwise . append ( proposal ) if self . _prior is None or isinstance ( self . _prior , ImproperEmpirical ): if proposal is not None : raise ValueError ( \"You had not passed a prior at initialization, but now you \" \"passed a proposal. If you want to run multi-round SNPE, you have \" \"to specify a prior (set the `.prior` argument or re-initialize \" \"the object with a prior distribution). If the samples you passed \" \"to `append_simulations()` were sampled from the prior, you can \" \"run single-round inference with \" \"`append_simulations(..., proposal=None)`.\" ) theta_prior = self . get_simulations ()[ 0 ] . to ( self . _device ) self . _prior = ImproperEmpirical ( theta_prior , ones ( theta_prior . shape [ 0 ], device = self . _device ) ) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'rejection' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. For SNPE, the posterior distribution that is returned here implements the following functionality over the raw neural density estimator: - correct the calculation of the log probability such that it compensates for the leakage. - reject samples that lie outside of the prior bounds. - alternatively, if leakage is very high (which can happen for multi-round SNPE), sample from the posterior with MCMC. Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'rejection' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior or DirectPosterior . By default, DirectPosterior is used. Only if rejection_sampling_parameters contains proposal , a RejectionPosterior is instantiated. {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior, sbi.inference.posteriors.direct_posterior.DirectPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snpe/snpe_c.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"rejection\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior , DirectPosterior ]: r \"\"\"Build posterior from the neural density estimator. For SNPE, the posterior distribution that is returned here implements the following functionality over the raw neural density estimator: - correct the calculation of the log probability such that it compensates for the leakage. - reject samples that lie outside of the prior bounds. - alternatively, if leakage is very high (which can happen for multi-round SNPE), sample from the posterior with MCMC. Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior` or `DirectPosterior`. By default, `DirectPosterior` is used. Only if `rejection_sampling_parameters` contains `proposal`, a `RejectionPosterior` is instantiated. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert self . _prior is not None , ( \"You did not pass a prior. You have to pass the prior either at \" \"initialization `inference = SNPE(prior)` or to \" \"`.build_posterior(prior=prior)`.\" ) prior = self . _prior else : utils . check_prior ( prior ) if density_estimator is None : posterior_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : posterior_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = posterior_estimator_based_potential ( posterior_estimator = posterior_estimator , prior = prior , x_o = None , ) if sample_with == \"rejection\" : if \"proposal\" in rejection_sampling_parameters . keys (): self . _posterior = RejectionPosterior ( potential_fn = potential_fn , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) else : self . _posterior = DirectPosterior ( posterior_estimator = posterior_estimator , # type: ignore prior = prior , x_shape = self . _x_shape , device = device , ) elif sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snpe/snpe_c.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snpe/snpe_c.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , num_atoms = 10 , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , calibration_kernel = None , resume_training = False , force_first_round_loss = False , discard_prior_samples = False , use_combined_loss = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) \u00b6 Return density estimator that approximates the distribution \\(p(\\theta|x)\\) . Parameters: Name Type Description Default num_atoms int Number of atoms to use for classification. 10 training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 calibration_kernel Optional[Callable] A function to calibrate the loss with respect to the simulations x . See Lueckmann, Gon\u00e7alves et al., NeurIPS 2017. None resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False force_first_round_loss bool If True , train with maximum likelihood, i.e., potentially ignoring the correction for using a proposal distribution different from the prior. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False use_combined_loss bool Whether to train the neural net also on prior samples using maximum likelihood in addition to training it on all samples using atomic loss. The extra MLE loss helps prevent density leaking with bounded priors. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Module Density estimator that approximates the distribution \\(p(\\theta|x)\\) . Source code in sbi/inference/snpe/snpe_c.py def train ( self , num_atoms : int = 10 , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , calibration_kernel : Optional [ Callable ] = None , resume_training : bool = False , force_first_round_loss : bool = False , discard_prior_samples : bool = False , use_combined_loss : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> nn . Module : r \"\"\"Return density estimator that approximates the distribution $p(\\theta|x)$. Args: num_atoms: Number of atoms to use for classification. training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. calibration_kernel: A function to calibrate the loss with respect to the simulations `x`. See Lueckmann, Gon\u00e7alves et al., NeurIPS 2017. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. force_first_round_loss: If `True`, train with maximum likelihood, i.e., potentially ignoring the correction for using a proposal distribution different from the prior. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. use_combined_loss: Whether to train the neural net also on prior samples using maximum likelihood in addition to training it on all samples using atomic loss. The extra MLE loss helps prevent density leaking with bounded priors. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Density estimator that approximates the distribution $p(\\theta|x)$. \"\"\" # WARNING: sneaky trick ahead. We proxy the parent's `train` here, # requiring the signature to have `num_atoms`, save it for use below, and # continue. It's sneaky because we are using the object (self) as a namespace # to pass arguments between functions, and that's implicit state management. self . _num_atoms = num_atoms self . _use_combined_loss = use_combined_loss kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" , \"num_atoms\" , \"use_combined_loss\" ), ) self . _round = max ( self . _data_round_index ) if self . _round > 0 : # Set the proposal to the last proposal that was passed by the user. For # atomic SNPE, it does not matter what the proposal is. For non-atomic # SNPE, we only use the latest data that was passed, i.e. the one from the # last proposal. proposal = self . _proposal_roundwise [ - 1 ] self . use_non_atomic_loss = ( isinstance ( proposal , DirectPosterior ) and isinstance ( proposal . posterior_estimator . _distribution , mdn ) and isinstance ( self . _neural_net . _distribution , mdn ) and check_dist_class ( self . _prior , class_to_check = ( Uniform , MultivariateNormal ) )[ 0 ] ) algorithm = \"non-atomic\" if self . use_non_atomic_loss else \"atomic\" print ( f \"Using SNPE-C with { algorithm } loss\" ) if self . use_non_atomic_loss : # Take care of z-scoring, pre-compute and store prior terms. self . _set_state_for_mog_proposal () return super () . train ( ** kwargs ) sbi.inference.snle.snle_a.SNLE_A ( LikelihoodEstimator ) \u00b6 __init__ ( self , prior = None , density_estimator = 'maf' , device = 'cpu' , logging_level = 'WARNING' , summary_writer = None , show_progress_bars = True ) special \u00b6 Sequential Neural Likelihood [1]. [1] Sequential Neural Likelihood: Fast Likelihood-free Inference with Autoregressive Flows_, Papamakarios et al., AISTATS 2019, https://arxiv.org/abs/1805.07226 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None density_estimator Union[str, Callable] If it is a string, use a pre-configured network of the provided type (one of nsf, maf, mdn, made). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the density estimator. The density estimator needs to provide the methods .log_prob and .sample() . 'maf' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'WARNING' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is <current working directory>/logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snle/snle_a.py def __init__ ( self , prior : Optional [ Distribution ] = None , density_estimator : Union [ str , Callable ] = \"maf\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"WARNING\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"Sequential Neural Likelihood [1]. [1] Sequential Neural Likelihood: Fast Likelihood-free Inference with Autoregressive Flows_, Papamakarios et al., AISTATS 2019, https://arxiv.org/abs/1805.07226 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. density_estimator: If it is a string, use a pre-configured network of the provided type (one of nsf, maf, mdn, made). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the density estimator. The density estimator needs to provide the methods `.log_prob` and `.sample()`. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `<current working directory>/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNLE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNLE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description LikelihoodEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snle/snle_a.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"LikelihoodEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNLE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNLE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNLE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNLE trains a neural network to approximate the likelihood \\(p(x|\\theta)\\) . The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snle/snle_a.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNLE trains a neural network to approximate the likelihood $p(x|\\theta)$. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNLE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : likelihood_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : likelihood_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = likelihood_estimator_based_potential ( likelihood_estimator = likelihood_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snle/snle_a.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snle/snle_a.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) inherited \u00b6 Train the density estimator to learn the distribution \\(p(x|\\theta)\\) . Parameters: Name Type Description Default resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Flow Density estimator that has learned the distribution \\(p(x|\\theta)\\) . Source code in sbi/inference/snle/snle_a.py def train ( self , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> flows . Flow : r \"\"\"Train the density estimator to learn the distribution $p(x|\\theta)$. Args: resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Density estimator that has learned the distribution $p(x|\\theta)$. \"\"\" # Load data from most recent round. self . _round = max ( self . _data_round_index ) # Starting index for the training set (1 = discard round-0 samples). start_idx = int ( discard_prior_samples and self . _round > 0 ) train_loader , val_loader = self . get_dataloaders ( start_idx , training_batch_size , validation_fraction , resume_training , dataloader_kwargs = dataloader_kwargs , ) # First round or if retraining from scratch: # Call the `self._build_neural_net` with the rounds' thetas and xs as # arguments, which will build the neural network # This is passed into NeuralPosterior, to create a neural posterior which # can `sample()` and `log_prob()`. The network is accessible via `.net`. if self . _neural_net is None or retrain_from_scratch : # Get theta,x to initialize NN theta , x , _ = self . get_simulations ( starting_round = start_idx ) # Use only training data for building the neural net (z-scoring transforms) self . _neural_net = self . _build_neural_net ( theta [ self . train_indices ] . to ( \"cpu\" ), x [ self . train_indices ] . to ( \"cpu\" ), ) self . _x_shape = x_shape_from_simulation ( x . to ( \"cpu\" )) del theta , x assert ( len ( self . _x_shape ) < 3 ), \"SNLE cannot handle multi-dimensional simulator output.\" self . _neural_net . to ( self . _device ) if not resume_training : self . optimizer = optim . Adam ( list ( self . _neural_net . parameters ()), lr = learning_rate , ) self . epoch , self . _val_log_prob = 0 , float ( \"-Inf\" ) while self . epoch <= max_num_epochs and not self . _converged ( self . epoch , stop_after_epochs ): # Train for a single epoch. self . _neural_net . train () train_log_probs_sum = 0 for batch in train_loader : self . optimizer . zero_grad () theta_batch , x_batch = ( batch [ 0 ] . to ( self . _device ), batch [ 1 ] . to ( self . _device ), ) # Evaluate on x with theta as context. train_losses = self . _loss ( theta = theta_batch , x = x_batch ) train_loss = torch . mean ( train_losses ) train_log_probs_sum -= train_losses . sum () . item () train_loss . backward () if clip_max_norm is not None : clip_grad_norm_ ( self . _neural_net . parameters (), max_norm = clip_max_norm , ) self . optimizer . step () self . epoch += 1 train_log_prob_average = train_log_probs_sum / ( len ( train_loader ) * train_loader . batch_size # type: ignore ) self . _summary [ \"training_log_probs\" ] . append ( train_log_prob_average ) # Calculate validation performance. self . _neural_net . eval () val_log_prob_sum = 0 with torch . no_grad (): for batch in val_loader : theta_batch , x_batch = ( batch [ 0 ] . to ( self . _device ), batch [ 1 ] . to ( self . _device ), ) # Evaluate on x with theta as context. val_losses = self . _loss ( theta = theta_batch , x = x_batch ) val_log_prob_sum -= val_losses . sum () . item () # Take mean over all validation samples. self . _val_log_prob = val_log_prob_sum / ( len ( val_loader ) * val_loader . batch_size # type: ignore ) # Log validation log prob for every epoch. self . _summary [ \"validation_log_probs\" ] . append ( self . _val_log_prob ) self . _maybe_show_progress ( self . _show_progress_bars , self . epoch ) self . _report_convergence_at_end ( self . epoch , stop_after_epochs , max_num_epochs ) # Update summary. self . _summary [ \"epochs_trained\" ] . append ( self . epoch ) self . _summary [ \"best_validation_log_prob\" ] . append ( self . _best_val_log_prob ) # Update TensorBoard and summary dict. self . _summarize ( round_ = self . _round ) # Update description for progress bar. if show_train_summary : print ( self . _describe_round ( self . _round , self . _summary )) # Avoid keeping the gradients in the resulting network, which can # cause memory leakage when benchmarking. self . _neural_net . zero_grad ( set_to_none = True ) return deepcopy ( self . _neural_net ) sbi.inference.snre.snre_a.SNRE_A ( RatioEstimator ) \u00b6 __init__ ( self , prior = None , classifier = 'resnet' , device = 'cpu' , logging_level = 'warning' , summary_writer = None , show_progress_bars = True ) special \u00b6 AALR[1], here known as SNRE_A. [1] Likelihood-free MCMC with Amortized Approximate Likelihood Ratios , Hermans et al., ICML 2020, https://arxiv.org/abs/1903.04057 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None classifier Union[str, Callable] Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the classifier. 'resnet' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'warning' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is <current working directory>/logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snre/snre_a.py def __init__ ( self , prior : Optional [ Distribution ] = None , classifier : Union [ str , Callable ] = \"resnet\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"warning\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"AALR[1], here known as SNRE_A. [1] _Likelihood-free MCMC with Amortized Approximate Likelihood Ratios_, Hermans et al., ICML 2020, https://arxiv.org/abs/1903.04057 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. classifier: Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the classifier. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `<current working directory>/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNRE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNRE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description RatioEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snre/snre_a.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"RatioEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNRE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNRE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNRE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the normalized posterior, but sampling still requires MCMC (or rejection sampling). Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note that some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snre/snre_a.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the **normalized** posterior, but sampling still requires MCMC (or rejection sampling). Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note that some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNRE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : ratio_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : ratio_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = ratio_estimator_based_potential ( ratio_estimator = ratio_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snre/snre_a.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snre/snre_a.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None , loss_kwargs = {}) \u00b6 Return classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Parameters: Name Type Description Default training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None loss_kwargs Dict[str, Any] Additional or updated kwargs to be passed to the self._loss fn. {} Returns: Type Description Module Classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Source code in sbi/inference/snre/snre_a.py def train ( self , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , loss_kwargs : Dict [ str , Any ] = {}, ) -> nn . Module : r \"\"\"Return classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. Args: training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) loss_kwargs: Additional or updated kwargs to be passed to the self._loss fn. Returns: Classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. \"\"\" # AALR is defined for `num_atoms=2`. # Proxy to `super().__call__` to ensure right parameter. kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) return super () . train ( ** kwargs , num_atoms = 2 ) sbi.inference.snre.snre_b.SNRE_B ( RatioEstimator ) \u00b6 __init__ ( self , prior = None , classifier = 'resnet' , device = 'cpu' , logging_level = 'warning' , summary_writer = None , show_progress_bars = True ) special \u00b6 SRE[1], here known as SNRE_B. [1] On Contrastive Learning for Likelihood-free Inference , Durkan et al., ICML 2020, https://arxiv.org/pdf/2002.03712 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None classifier Union[str, Callable] Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the classifier. 'resnet' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'warning' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is <current working directory>/logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snre/snre_b.py def __init__ ( self , prior : Optional [ Distribution ] = None , classifier : Union [ str , Callable ] = \"resnet\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"warning\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"SRE[1], here known as SNRE_B. [1] _On Contrastive Learning for Likelihood-free Inference_, Durkan et al., ICML 2020, https://arxiv.org/pdf/2002.03712 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. classifier: Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the classifier. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `<current working directory>/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNRE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNRE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description RatioEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snre/snre_b.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"RatioEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNRE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNRE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNRE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the normalized posterior, but sampling still requires MCMC (or rejection sampling). Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note that some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snre/snre_b.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the **normalized** posterior, but sampling still requires MCMC (or rejection sampling). Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note that some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNRE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : ratio_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : ratio_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = ratio_estimator_based_potential ( ratio_estimator = ratio_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snre/snre_b.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snre/snre_b.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , num_atoms = 10 , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) \u00b6 Return classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Parameters: Name Type Description Default num_atoms int Number of atoms to use for classification. 10 training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Module Classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Source code in sbi/inference/snre/snre_b.py def train ( self , num_atoms : int = 10 , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> nn . Module : r \"\"\"Return classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. Args: num_atoms: Number of atoms to use for classification. training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) return super () . train ( ** kwargs ) sbi.inference.snre.snre_c.SNRE_C ( RatioEstimator ) \u00b6 __init__ ( self , prior = None , classifier = 'resnet' , device = 'cpu' , logging_level = 'warning' , summary_writer = None , show_progress_bars = True ) special \u00b6 NRE-C[1] is a generalization of the non-sequential (amortized) versions of SNRE_A and SNRE_B. We call the algorithm SNRE_C within sbi . NRE-C: (1) like SNRE_B, features a \u201cmulticlass\u201d loss function where several marginally drawn parameter-data pairs are contrasted against a jointly drawn pair. (2) like AALR/NRE_A, i.e., the non-sequential version of SNRE_A, it encourages the approximate ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) , accessed through .potential() within sbi , to be exact at optimum. This addresses the issue that SNRE_B estimates this ratio only up to an arbitrary function (normalizing constant) of the data \\(x\\) . Just like for all ratio estimation algorithms, the sequential version of SNRE_C will be estimated only up to a function (normalizing constant) of the data \\(x\\) in rounds after the first. [1] Contrastive Neural Ratio Estimation , Benajmin Kurt Miller, et. al., NeurIPS 2022, https://arxiv.org/abs/2210.06170 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None classifier Union[str, Callable] Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the classifier. 'resnet' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'warning' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is <current working directory>/logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snre/snre_c.py def __init__ ( self , prior : Optional [ Distribution ] = None , classifier : Union [ str , Callable ] = \"resnet\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"warning\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"NRE-C[1] is a generalization of the non-sequential (amortized) versions of SNRE_A and SNRE_B. We call the algorithm SNRE_C within `sbi`. NRE-C: (1) like SNRE_B, features a \"multiclass\" loss function where several marginally drawn parameter-data pairs are contrasted against a jointly drawn pair. (2) like AALR/NRE_A, i.e., the non-sequential version of SNRE_A, it encourages the approximate ratio $p(\\theta,x)/p(\\theta)p(x)$, accessed through `.potential()` within `sbi`, to be exact at optimum. This addresses the issue that SNRE_B estimates this ratio only up to an arbitrary function (normalizing constant) of the data $x$. Just like for all ratio estimation algorithms, the sequential version of SNRE_C will be estimated only up to a function (normalizing constant) of the data $x$ in rounds after the first. [1] _Contrastive Neural Ratio Estimation_, Benajmin Kurt Miller, et. al., NeurIPS 2022, https://arxiv.org/abs/2210.06170 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. classifier: Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations (theta, x), which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the classifier. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `<current working directory>/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNRE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNRE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description RatioEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snre/snre_c.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"RatioEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNRE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNRE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNRE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the normalized posterior, but sampling still requires MCMC (or rejection sampling). Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note that some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snre/snre_c.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the **normalized** posterior, but sampling still requires MCMC (or rejection sampling). Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note that some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNRE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : ratio_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : ratio_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = ratio_estimator_based_potential ( ratio_estimator = ratio_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snre/snre_c.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snre/snre_c.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , num_classes = 5 , gamma = 1.0 , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) \u00b6 Return classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Parameters: Name Type Description Default num_classes int Number of theta to classify against, corresponds to \\(K\\) in Contrastive Neural Ratio Estimation . Minimum value is 1. Similar to num_atoms for SNRE_B except SNRE_C has an additional independently drawn sample. The total number of alternative parameters NRE-C \u201csees\u201d is \\(2K-1\\) or 2 * num_classes - 1 divided between two loss terms. 5 gamma float Determines the relative weight of the sum of all \\(K\\) dependently drawn classes against the marginally drawn one. Specifically, \\(p(y=k) :=p_K\\) , \\(p(y=0) := p_0\\) , \\(p_0 = 1 - K p_K\\) , and finally \\(\\gamma := K p_K / p_0\\) . 1.0 training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 exclude_invalid_x Whether to exclude simulation outputs x=NaN or x=\u00b1\u221e during training. Expect errors, silent or explicit, when False . required resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Module Classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Source code in sbi/inference/snre/snre_c.py def train ( self , num_classes : int = 5 , gamma : float = 1.0 , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> nn . Module : r \"\"\"Return classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. Args: num_classes: Number of theta to classify against, corresponds to $K$ in _Contrastive Neural Ratio Estimation_. Minimum value is 1. Similar to `num_atoms` for SNRE_B except SNRE_C has an additional independently drawn sample. The total number of alternative parameters `NRE-C` \"sees\" is $2K-1$ or `2 * num_classes - 1` divided between two loss terms. gamma: Determines the relative weight of the sum of all $K$ dependently drawn classes against the marginally drawn one. Specifically, $p(y=k) :=p_K$, $p(y=0) := p_0$, $p_0 = 1 - K p_K$, and finally $\\gamma := K p_K / p_0$. training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. exclude_invalid_x: Whether to exclude simulation outputs `x=NaN` or `x=\u00b1\u221e` during training. Expect errors, silent or explicit, when `False`. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) kwargs [ \"num_atoms\" ] = kwargs . pop ( \"num_classes\" ) + 1 kwargs [ \"loss_kwargs\" ] = { \"gamma\" : kwargs . pop ( \"gamma\" )} return super () . train ( ** kwargs ) sbi.inference.snre.bnre.BNRE ( SNRE_A ) \u00b6 __init__ ( self , prior = None , classifier = 'resnet' , device = 'cpu' , logging_level = 'warning' , summary_writer = None , show_progress_bars = True ) special \u00b6 Balanced neural ratio estimation (BNRE)[1]. BNRE is a variation of NRE aiming to produce more conservative posterior approximations [1] Delaunoy, A., Hermans, J., Rozet, F., Wehenkel, A., & Louppe, G.. Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation. NeurIPS 2022. https://arxiv.org/abs/2208.13624 Parameters: Name Type Description Default prior Optional[torch.distributions.distribution.Distribution] A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If None , the prior must be passed to .build_posterior() . None classifier Union[str, Callable] Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations \\((\\theta, x)\\) , which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch nn.Module implementing the classifier. 'resnet' device str Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:{0, 1, \u2026}\u201d. 'cpu' logging_level Union[int, str] Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. 'warning' summary_writer Optional[Writer] A tensorboard SummaryWriter to control, among others, log file location (default is <current working directory>/logs .) None show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/snre/bnre.py def __init__ ( self , prior : Optional [ Distribution ] = None , classifier : Union [ str , Callable ] = \"resnet\" , device : str = \"cpu\" , logging_level : Union [ int , str ] = \"warning\" , summary_writer : Optional [ TensorboardSummaryWriter ] = None , show_progress_bars : bool = True , ): r \"\"\"Balanced neural ratio estimation (BNRE)[1]. BNRE is a variation of NRE aiming to produce more conservative posterior approximations [1] Delaunoy, A., Hermans, J., Rozet, F., Wehenkel, A., & Louppe, G.. Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation. NeurIPS 2022. https://arxiv.org/abs/2208.13624 Args: prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. If `None`, the prior must be passed to `.build_posterior()`. classifier: Classifier trained to approximate likelihood ratios. If it is a string, use a pre-configured network of the provided type (one of linear, mlp, resnet). Alternatively, a function that builds a custom neural network can be provided. The function will be called with the first batch of simulations $(\\theta, x)$, which can thus be used for shape inference and potentially for z-scoring. It needs to return a PyTorch `nn.Module` implementing the classifier. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:{0, 1, ...}\". logging_level: Minimum severity of messages to log. One of the strings INFO, WARNING, DEBUG, ERROR and CRITICAL. summary_writer: A tensorboard `SummaryWriter` to control, among others, log file location (default is `<current working directory>/logs`.) show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) super () . __init__ ( ** kwargs ) append_simulations ( self , theta , x , exclude_invalid_x = False , from_round = 0 , data_device = None ) inherited \u00b6 Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores \\(\\theta\\) , \\(x\\) , prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Parameters: Name Type Description Default theta Tensor Parameter sets. required x Tensor Simulation outputs. required exclude_invalid_x bool Whether invalid simulations are discarded during training. If False , SNRE raises an error when invalid simulations are found. If True , invalid simulations are discarded and training can proceed, but this gives systematically wrong results. False from_round int Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for SNRE . Only when the user later on requests .train(discard_prior_samples=True) , we use these indices to find which training data stemmed from the prior. 0 data_device Optional[str] Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to \u2018cpu\u2019 to store data on system memory instead. None Returns: Type Description RatioEstimator NeuralInference object (returned so that this function is chainable). Source code in sbi/inference/snre/bnre.py def append_simulations ( self , theta : Tensor , x : Tensor , exclude_invalid_x : bool = False , from_round : int = 0 , data_device : Optional [ str ] = None , ) -> \"RatioEstimator\" : r \"\"\"Store parameters and simulation outputs to use them for later training. Data are stored as entries in lists for each type of variable (parameter/data). Stores $\\theta$, $x$, prior_masks (indicating if simulations are coming from the prior or not) and an index indicating which round the batch of simulations came from. Args: theta: Parameter sets. x: Simulation outputs. exclude_invalid_x: Whether invalid simulations are discarded during training. If `False`, SNRE raises an error when invalid simulations are found. If `True`, invalid simulations are discarded and training can proceed, but this gives systematically wrong results. from_round: Which round the data stemmed from. Round 0 means from the prior. With default settings, this is not used at all for `SNRE`. Only when the user later on requests `.train(discard_prior_samples=True)`, we use these indices to find which training data stemmed from the prior. data_device: Where to store the data, default is on the same device where the training is happening. If training a large dataset on a GPU with not much VRAM can set to 'cpu' to store data on system memory instead. Returns: NeuralInference object (returned so that this function is chainable). \"\"\" is_valid_x , num_nans , num_infs = handle_invalid_x ( x , exclude_invalid_x ) x = x [ is_valid_x ] theta = theta [ is_valid_x ] # Check for problematic z-scoring warn_if_zscoring_changes_data ( x ) nle_nre_apt_msg_on_invalid_x ( num_nans , num_infs , exclude_invalid_x , \"SNRE\" ) if data_device is None : data_device = self . _device theta , x = validate_theta_and_x ( theta , x , data_device = data_device , training_device = self . _device ) prior_masks = mask_sims_from_prior ( int ( from_round ), theta . size ( 0 )) self . _theta_roundwise . append ( theta ) self . _x_roundwise . append ( x ) self . _prior_masks . append ( prior_masks ) self . _data_round_index . append ( int ( from_round )) return self build_posterior ( self , density_estimator = None , prior = None , sample_with = 'mcmc' , mcmc_method = 'slice_np' , vi_method = 'rKL' , mcmc_parameters = {}, vi_parameters = {}, rejection_sampling_parameters = {}) inherited \u00b6 Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability \\(p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)\\) and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the normalized posterior, but sampling still requires MCMC (or rejection sampling). Parameters: Name Type Description Default density_estimator Optional[torch.nn.modules.module.Module] The density estimator that the posterior is based on. If None , use the latest neural density estimator that was trained. None prior Optional[torch.distributions.distribution.Distribution] Prior distribution. None sample_with str Method to use for sampling from the posterior. Must be one of [ mcmc | rejection | vi ]. 'mcmc' mcmc_method str Method used for MCMC sampling, one of slice_np , slice , hmc , nuts . Currently defaults to slice_np for a custom numpy implementation of slice sampling; select hmc , nuts or slice for Pyro-based sampling. 'slice_np' vi_method str Method used for VI, one of [ rKL , fKL , IW , alpha ]. Note that some of the methods admit a mode seeking property (e.g. rKL) whereas some admit a mass covering one (e.g fKL). 'rKL' mcmc_parameters Dict[str, Any] Additional kwargs passed to MCMCPosterior . {} vi_parameters Dict[str, Any] Additional kwargs passed to VIPosterior . {} rejection_sampling_parameters Dict[str, Any] Additional kwargs passed to RejectionPosterior . {} Returns: Type Description Union[sbi.inference.posteriors.mcmc_posterior.MCMCPosterior, sbi.inference.posteriors.rejection_posterior.RejectionPosterior, sbi.inference.posteriors.vi_posterior.VIPosterior] Posterior \\(p(\\theta|x)\\) with .sample() and .log_prob() methods (the returned log-probability is unnormalized). Source code in sbi/inference/snre/bnre.py def build_posterior ( self , density_estimator : Optional [ nn . Module ] = None , prior : Optional [ Distribution ] = None , sample_with : str = \"mcmc\" , mcmc_method : str = \"slice_np\" , vi_method : str = \"rKL\" , mcmc_parameters : Dict [ str , Any ] = {}, vi_parameters : Dict [ str , Any ] = {}, rejection_sampling_parameters : Dict [ str , Any ] = {}, ) -> Union [ MCMCPosterior , RejectionPosterior , VIPosterior ]: r \"\"\"Build posterior from the neural density estimator. SNRE trains a neural network to approximate likelihood ratios. The posterior wraps the trained network such that one can directly evaluate the unnormalized posterior log probability $p(\\theta|x) \\propto p(x|\\theta) \\cdot p(\\theta)$ and draw samples from the posterior with MCMC or rejection sampling. Note that, in the case of single-round SNRE_A / AALR, it is possible to evaluate the log-probability of the **normalized** posterior, but sampling still requires MCMC (or rejection sampling). Args: density_estimator: The density estimator that the posterior is based on. If `None`, use the latest neural density estimator that was trained. prior: Prior distribution. sample_with: Method to use for sampling from the posterior. Must be one of [`mcmc` | `rejection` | `vi`]. mcmc_method: Method used for MCMC sampling, one of `slice_np`, `slice`, `hmc`, `nuts`. Currently defaults to `slice_np` for a custom numpy implementation of slice sampling; select `hmc`, `nuts` or `slice` for Pyro-based sampling. vi_method: Method used for VI, one of [`rKL`, `fKL`, `IW`, `alpha`]. Note that some of the methods admit a `mode seeking` property (e.g. rKL) whereas some admit a `mass covering` one (e.g fKL). mcmc_parameters: Additional kwargs passed to `MCMCPosterior`. vi_parameters: Additional kwargs passed to `VIPosterior`. rejection_sampling_parameters: Additional kwargs passed to `RejectionPosterior`. Returns: Posterior $p(\\theta|x)$ with `.sample()` and `.log_prob()` methods (the returned log-probability is unnormalized). \"\"\" if prior is None : assert ( self . _prior is not None ), \"\"\"You did not pass a prior. You have to pass the prior either at initialization `inference = SNRE(prior)` or to `.build_posterior (prior=prior)`.\"\"\" prior = self . _prior else : check_prior ( prior ) if density_estimator is None : ratio_estimator = self . _neural_net # If internal net is used device is defined. device = self . _device else : ratio_estimator = density_estimator # Otherwise, infer it from the device of the net parameters. device = next ( density_estimator . parameters ()) . device . type potential_fn , theta_transform = ratio_estimator_based_potential ( ratio_estimator = ratio_estimator , prior = prior , x_o = None , ) if sample_with == \"mcmc\" : self . _posterior = MCMCPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , proposal = prior , method = mcmc_method , device = device , x_shape = self . _x_shape , ** mcmc_parameters , ) elif sample_with == \"rejection\" : self . _posterior = RejectionPosterior ( potential_fn = potential_fn , proposal = prior , device = device , x_shape = self . _x_shape , ** rejection_sampling_parameters , ) elif sample_with == \"vi\" : self . _posterior = VIPosterior ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , # type: ignore vi_method = vi_method , device = device , x_shape = self . _x_shape , ** vi_parameters , ) else : raise NotImplementedError # Store models at end of each round. self . _model_bank . append ( deepcopy ( self . _posterior )) return deepcopy ( self . _posterior ) get_dataloaders ( self , starting_round = 0 , training_batch_size = 50 , validation_fraction = 0.1 , resume_training = False , dataloader_kwargs = None ) inherited \u00b6 Return dataloaders for training and validation. Parameters: Name Type Description Default dataset holding all theta and x, optionally masks. required training_batch_size int training arg of inference methods. 50 resume_training bool Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. False dataloader_kwargs Optional[dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). None Returns: Type Description Tuple[torch.utils.data.dataloader.DataLoader, torch.utils.data.dataloader.DataLoader] Tuple of dataloaders for training and validation. Source code in sbi/inference/snre/bnre.py def get_dataloaders ( self , starting_round : int = 0 , training_batch_size : int = 50 , validation_fraction : float = 0.1 , resume_training : bool = False , dataloader_kwargs : Optional [ dict ] = None , ) -> Tuple [ data . DataLoader , data . DataLoader ]: \"\"\"Return dataloaders for training and validation. Args: dataset: holding all theta and x, optionally masks. training_batch_size: training arg of inference methods. resume_training: Whether the current call is resuming training so that no new training and validation indices into the dataset have to be created. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn). Returns: Tuple of dataloaders for training and validation. \"\"\" # theta , x , prior_masks = self . get_simulations ( starting_round ) dataset = data . TensorDataset ( theta , x , prior_masks ) # Get total number of training examples. num_examples = theta . size ( 0 ) # Select random train and validation splits from (theta, x) pairs. num_training_examples = int (( 1 - validation_fraction ) * num_examples ) num_validation_examples = num_examples - num_training_examples if not resume_training : # Seperate indicies for training and validation permuted_indices = torch . randperm ( num_examples ) self . train_indices , self . val_indices = ( permuted_indices [: num_training_examples ], permuted_indices [ num_training_examples :], ) # Create training and validation loaders using a subset sampler. # Intentionally use dicts to define the default dataloader args # Then, use dataloader_kwargs to override (or add to) any of these defaults # https://stackoverflow.com/questions/44784577/in-method-call-args-how-to-override-keyword-argument-of-unpacked-dict train_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_training_examples ), \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . train_indices . tolist ()), } val_loader_kwargs = { \"batch_size\" : min ( training_batch_size , num_validation_examples ), \"shuffle\" : False , \"drop_last\" : True , \"sampler\" : SubsetRandomSampler ( self . val_indices . tolist ()), } if dataloader_kwargs is not None : train_loader_kwargs = dict ( train_loader_kwargs , ** dataloader_kwargs ) val_loader_kwargs = dict ( val_loader_kwargs , ** dataloader_kwargs ) train_loader = data . DataLoader ( dataset , ** train_loader_kwargs ) val_loader = data . DataLoader ( dataset , ** val_loader_kwargs ) return train_loader , val_loader get_simulations ( self , starting_round = 0 ) inherited \u00b6 Returns all \\(\\theta\\) , \\(x\\) , and prior_masks from rounds >= starting_round . If requested, do not return invalid data. Parameters: Name Type Description Default starting_round int The earliest round to return samples from (we start counting from zero). 0 warn_on_invalid Whether to give out a warning if invalid simulations were found. required Returns: Parameters, simulation outputs, prior masks. Source code in sbi/inference/snre/bnre.py def get_simulations ( self , starting_round : int = 0 , ) -> Tuple [ Tensor , Tensor , Tensor ]: r \"\"\"Returns all $\\theta$, $x$, and prior_masks from rounds >= `starting_round`. If requested, do not return invalid data. Args: starting_round: The earliest round to return samples from (we start counting from zero). warn_on_invalid: Whether to give out a warning if invalid simulations were found. Returns: Parameters, simulation outputs, prior masks. \"\"\" theta = get_simulations_since_round ( self . _theta_roundwise , self . _data_round_index , starting_round ) x = get_simulations_since_round ( self . _x_roundwise , self . _data_round_index , starting_round ) prior_masks = get_simulations_since_round ( self . _prior_masks , self . _data_round_index , starting_round ) return theta , x , prior_masks train ( self , regularization_strength = 100.0 , training_batch_size = 50 , learning_rate = 0.0005 , validation_fraction = 0.1 , stop_after_epochs = 20 , max_num_epochs = 2147483647 , clip_max_norm = 5.0 , resume_training = False , discard_prior_samples = False , retrain_from_scratch = False , show_train_summary = False , dataloader_kwargs = None ) \u00b6 Return classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Parameters: Name Type Description Default regularization_strength float The multiplicative coefficient applied to the balancing regularizer ( \\(\\lambda\\) ). 100.0 training_batch_size int Training batch size. 50 learning_rate float Learning rate for Adam optimizer. 0.0005 validation_fraction float The fraction of data to use for validation. 0.1 stop_after_epochs int The number of epochs to wait for improvement on the validation set before terminating training. 20 max_num_epochs int Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also stop_after_epochs ). 2147483647 clip_max_norm Optional[float] Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. 5.0 exclude_invalid_x Whether to exclude simulation outputs x=NaN or x=\u00b1\u221e during training. Expect errors, silent or explicit, when False . required resume_training bool Can be used in case training time is limited, e.g. on a cluster. If True , the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time .train() was called. False discard_prior_samples bool Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. False retrain_from_scratch bool Whether to retrain the conditional density estimator for the posterior from scratch each round. False show_train_summary bool Whether to print the number of epochs and validation loss and leakage after the training. False dataloader_kwargs Optional[Dict] Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) None Returns: Type Description Module Classifier that approximates the ratio \\(p(\\theta,x)/p(\\theta)p(x)\\) . Source code in sbi/inference/snre/bnre.py def train ( self , regularization_strength : float = 100.0 , training_batch_size : int = 50 , learning_rate : float = 5e-4 , validation_fraction : float = 0.1 , stop_after_epochs : int = 20 , max_num_epochs : int = 2 ** 31 - 1 , clip_max_norm : Optional [ float ] = 5.0 , resume_training : bool = False , discard_prior_samples : bool = False , retrain_from_scratch : bool = False , show_train_summary : bool = False , dataloader_kwargs : Optional [ Dict ] = None , ) -> nn . Module : r \"\"\"Return classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. Args: regularization_strength: The multiplicative coefficient applied to the balancing regularizer ($\\lambda$). training_batch_size: Training batch size. learning_rate: Learning rate for Adam optimizer. validation_fraction: The fraction of data to use for validation. stop_after_epochs: The number of epochs to wait for improvement on the validation set before terminating training. max_num_epochs: Maximum number of epochs to run. If reached, we stop training even when the validation loss is still decreasing. Otherwise, we train until validation loss increases (see also `stop_after_epochs`). clip_max_norm: Value at which to clip the total gradient norm in order to prevent exploding gradients. Use None for no clipping. exclude_invalid_x: Whether to exclude simulation outputs `x=NaN` or `x=\u00b1\u221e` during training. Expect errors, silent or explicit, when `False`. resume_training: Can be used in case training time is limited, e.g. on a cluster. If `True`, the split between train and validation set, the optimizer, the number of epochs, and the best validation log-prob will be restored from the last time `.train()` was called. discard_prior_samples: Whether to discard samples simulated in round 1, i.e. from the prior. Training may be sped up by ignoring such less targeted samples. retrain_from_scratch: Whether to retrain the conditional density estimator for the posterior from scratch each round. show_train_summary: Whether to print the number of epochs and validation loss and leakage after the training. dataloader_kwargs: Additional or updated kwargs to be passed to the training and validation dataloaders (like, e.g., a collate_fn) Returns: Classifier that approximates the ratio $p(\\theta,x)/p(\\theta)p(x)$. \"\"\" kwargs = del_entries ( locals (), entries = ( \"self\" , \"__class__\" )) kwargs [ \"loss_kwargs\" ] = { \"regularization_strength\" : kwargs . pop ( \"regularization_strength\" ) } return super () . train ( ** kwargs ) sbi.inference.abc.mcabc.MCABC ( ABCBASE ) \u00b6 __call__ ( self , x_o , num_simulations , eps = None , quantile = None , lra = False , sass = False , sass_fraction = 0.25 , sass_expansion_degree = 1 , kde = False , kde_kwargs = {}, return_summary = False ) special \u00b6 Run MCABC and return accepted parameters or KDE object fitted on them. Parameters: Name Type Description Default x_o Union[torch.Tensor, numpy.ndarray] Observed data. required num_simulations int Number of simulations to run. required eps Optional[float] Acceptance threshold \\(\\epsilon\\) for distance between observed and simulated data. None quantile Optional[float] Upper quantile of smallest distances for which the corresponding parameters are returned, e.g, q=0.01 will return the top 1%. Exactly one of quantile or eps have to be passed. None lra bool Whether to run linear regression adjustment as in Beaumont et al. 2002 False sass bool Whether to determine semi-automatic summary statistics as in Fearnhead & Prangle 2012. False sass_fraction float Fraction of simulation budget used for the initial sass run. 0.25 sass_expansion_degree int Degree of the polynomial feature expansion for the sass regression, default 1 - no expansion. 1 kde bool Whether to run KDE on the accepted parameters to return a KDE object from which one can sample. False kde_kwargs Dict[str, Any] kwargs for performing KDE: \u2018bandwidth=\u2019; either a float, or a string naming a bandwidth heuristics, e.g., \u2018cv\u2019 (cross validation), \u2018silvermann\u2019 or \u2018scott\u2019, default \u2018cv\u2019. \u2018transform\u2019: transform applied to the parameters before doing KDE. \u2018sample_weights\u2019: weights associated with samples. See \u2018get_kde\u2019 for more details {} return_summary bool Whether to return the distances and data corresponding to the accepted parameters. False Returns: Type Description theta (if kde False) accepted parameters kde (if kde True): KDE object based on accepted parameters from which one can .sample() and .log_prob(). summary (if summary True): dictionary containing the accepted paramters (if kde True), distances and simulated data x. Source code in sbi/inference/abc/mcabc.py def __call__ ( self , x_o : Union [ Tensor , ndarray ], num_simulations : int , eps : Optional [ float ] = None , quantile : Optional [ float ] = None , lra : bool = False , sass : bool = False , sass_fraction : float = 0.25 , sass_expansion_degree : int = 1 , kde : bool = False , kde_kwargs : Dict [ str , Any ] = {}, return_summary : bool = False , ) -> Union [ Tuple [ Tensor , dict ], Tuple [ KDEWrapper , dict ], Tensor , KDEWrapper ]: r \"\"\"Run MCABC and return accepted parameters or KDE object fitted on them. Args: x_o: Observed data. num_simulations: Number of simulations to run. eps: Acceptance threshold $\\epsilon$ for distance between observed and simulated data. quantile: Upper quantile of smallest distances for which the corresponding parameters are returned, e.g, q=0.01 will return the top 1%. Exactly one of quantile or `eps` have to be passed. lra: Whether to run linear regression adjustment as in Beaumont et al. 2002 sass: Whether to determine semi-automatic summary statistics as in Fearnhead & Prangle 2012. sass_fraction: Fraction of simulation budget used for the initial sass run. sass_expansion_degree: Degree of the polynomial feature expansion for the sass regression, default 1 - no expansion. kde: Whether to run KDE on the accepted parameters to return a KDE object from which one can sample. kde_kwargs: kwargs for performing KDE: 'bandwidth='; either a float, or a string naming a bandwidth heuristics, e.g., 'cv' (cross validation), 'silvermann' or 'scott', default 'cv'. 'transform': transform applied to the parameters before doing KDE. 'sample_weights': weights associated with samples. See 'get_kde' for more details return_summary: Whether to return the distances and data corresponding to the accepted parameters. Returns: theta (if kde False): accepted parameters kde (if kde True): KDE object based on accepted parameters from which one can .sample() and .log_prob(). summary (if summary True): dictionary containing the accepted paramters (if kde True), distances and simulated data x. \"\"\" # Exactly one of eps or quantile need to be passed. assert ( eps is not None ) ^ ( quantile is not None ), \"Eps or quantile must be passed, but not both.\" # Run SASS and change the simulator and x_o accordingly. if sass : num_pilot_simulations = int ( sass_fraction * num_simulations ) self . logger . info ( f \"Running SASS with { num_pilot_simulations } pilot samples.\" ) num_simulations -= num_pilot_simulations pilot_theta = self . prior . sample (( num_pilot_simulations ,)) pilot_x = self . _batched_simulator ( pilot_theta ) sass_transform = self . get_sass_transform ( pilot_theta , pilot_x , sass_expansion_degree ) simulator = lambda theta : sass_transform ( self . _batched_simulator ( theta )) x_o = sass_transform ( x_o ) else : simulator = self . _batched_simulator # Simulate and calculate distances. theta = self . prior . sample (( num_simulations ,)) x = simulator ( theta ) # Infer shape of x to test and set x_o. self . x_shape = x [ 0 ] . unsqueeze ( 0 ) . shape self . x_o = process_x ( x_o , self . x_shape ) distances = self . distance ( self . x_o , x ) # Select based on acceptance threshold epsilon. if eps is not None : is_accepted = distances < eps num_accepted = is_accepted . sum () . item () assert num_accepted > 0 , f \"No parameters accepted, eps= { eps } too small\" theta_accepted = theta [ is_accepted ] distances_accepted = distances [ is_accepted ] x_accepted = x [ is_accepted ] # Select based on quantile on sorted distances. elif quantile is not None : num_top_samples = int ( num_simulations * quantile ) sort_idx = torch . argsort ( distances ) theta_accepted = theta [ sort_idx ][: num_top_samples ] distances_accepted = distances [ sort_idx ][: num_top_samples ] x_accepted = x [ sort_idx ][: num_top_samples ] else : raise ValueError ( \"One of epsilon or quantile has to be passed.\" ) # Maybe adjust theta with LRA. if lra : self . logger . info ( \"Running Linear regression adjustment.\" ) final_theta = self . run_lra ( theta_accepted , x_accepted , observation = self . x_o ) else : final_theta = theta_accepted if kde : self . logger . info ( f \"\"\"KDE on { final_theta . shape [ 0 ] } samples with bandwidth option { kde_kwargs [ \"bandwidth\" ] if \"bandwidth\" in kde_kwargs else \"cv\" } . Beware that KDE can give unreliable results when used with too few samples and in high dimensions.\"\"\" ) kde_dist = get_kde ( final_theta , ** kde_kwargs ) if return_summary : return ( kde_dist , dict ( theta = final_theta , distances = distances_accepted , x = x_accepted ), ) else : return kde_dist elif return_summary : return final_theta , dict ( distances = distances_accepted , x = x_accepted ) else : return final_theta __init__ ( self , simulator , prior , distance = 'l2' , num_workers = 1 , simulation_batch_size = 1 , show_progress_bars = True ) special \u00b6 Monte-Carlo Approximate Bayesian Computation (Rejection ABC) [1]. [1] Pritchard, J. K., Seielstad, M. T., Perez-Lezaun, A., & Feldman, M. W. (1999). Population growth of human Y chromosomes: a study of Y chromosome microsatellites. Molecular biology and evolution, 16(12), 1791-1798. Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\mathrm{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required prior A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. required distance Union[str, Callable] Distance function to compare observed and simulated data. Can be a custom function or one of l1 , l2 , mse . 'l2' num_workers int Number of parallel workers to use for simulations. 1 simulation_batch_size int Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). 1 show_progress_bars bool Whether to show a progressbar during simulation and sampling. True Source code in sbi/inference/abc/mcabc.py def __init__ ( self , simulator : Callable , prior , distance : Union [ str , Callable ] = \"l2\" , num_workers : int = 1 , simulation_batch_size : int = 1 , show_progress_bars : bool = True , ): r \"\"\"Monte-Carlo Approximate Bayesian Computation (Rejection ABC) [1]. [1] Pritchard, J. K., Seielstad, M. T., Perez-Lezaun, A., & Feldman, M. W. (1999). Population growth of human Y chromosomes: a study of Y chromosome microsatellites. Molecular biology and evolution, 16(12), 1791-1798. Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\mathrm{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. distance: Distance function to compare observed and simulated data. Can be a custom function or one of `l1`, `l2`, `mse`. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). show_progress_bars: Whether to show a progressbar during simulation and sampling. \"\"\" super () . __init__ ( simulator = simulator , prior = prior , distance = distance , num_workers = num_workers , simulation_batch_size = simulation_batch_size , show_progress_bars = show_progress_bars , ) get_distance_function ( distance_type = 'l2' ) inherited \u00b6 Return distance function for given distance type. Parameters: Name Type Description Default distance_type Union[str, Callable] string indicating the distance type, e.g., \u2018l2\u2019, \u2018l1\u2019, \u2018mse\u2019. Note that the returned distance function averages over the last dimension, e.g., over the summary statistics. 'l2' Returns: Type Description distance_fun distance functions built from passe string. Returns distance_type is callable. Source code in sbi/inference/abc/mcabc.py @staticmethod def get_distance_function ( distance_type : Union [ str , Callable ] = \"l2\" ) -> Callable : \"\"\"Return distance function for given distance type. Args: distance_type: string indicating the distance type, e.g., 'l2', 'l1', 'mse'. Note that the returned distance function averages over the last dimension, e.g., over the summary statistics. Returns: distance_fun: distance functions built from passe string. Returns distance_type is callable. \"\"\" if isinstance ( distance_type , Callable ): return distance_type distances = [ \"l1\" , \"l2\" , \"mse\" ] assert ( distance_type in distances ), f \" { distance_type } must be one of { distances } .\" if distance_type == \"mse\" : distance = lambda xo , x : torch . mean (( xo - x ) ** 2 , dim =- 1 ) elif distance_type == \"l2\" : distance = lambda xo , x : torch . norm (( xo - x ), dim =- 1 ) elif distance_type == \"l1\" : distance = lambda xo , x : torch . mean ( abs ( xo - x ), dim =- 1 ) else : raise ValueError ( r \"Distance {distance_type} not supported.\" ) def distance_fun ( observed_data : Tensor , simulated_data : Tensor ) -> Tensor : \"\"\"Return distance over batch dimension. Args: observed_data: Observed data, could be 1D. simulated_data: Batch of simulated data, has batch dimension. Returns: Torch tensor with batch of distances. \"\"\" assert simulated_data . ndim == 2 , \"simulated data needs batch dimension\" return distance ( observed_data , simulated_data ) return distance_fun get_sass_transform ( theta , x , expansion_degree = 1 , sample_weight = None ) inherited \u00b6 Return semi-automatic summary statitics function. Running weighted linear regressin as in Fearnhead & Prandle 2012: https://arxiv.org/abs/1004.1112 Following implementation in https://abcpy.readthedocs.io/en/latest/_modules/abcpy/statistics.html#Identity and https://pythonhosted.org/abcpy/_modules/abcpy/summaryselections.html#Semiautomatic Source code in sbi/inference/abc/mcabc.py @staticmethod def get_sass_transform ( theta : torch . Tensor , x : torch . Tensor , expansion_degree : int = 1 , sample_weight = None , ) -> Callable : \"\"\"Return semi-automatic summary statitics function. Running weighted linear regressin as in Fearnhead & Prandle 2012: https://arxiv.org/abs/1004.1112 Following implementation in https://abcpy.readthedocs.io/en/latest/_modules/abcpy/statistics.html#Identity and https://pythonhosted.org/abcpy/_modules/abcpy/summaryselections.html#Semiautomatic \"\"\" expansion = PolynomialFeatures ( degree = expansion_degree , include_bias = False ) # Transform x, remove intercept. x_expanded = expansion . fit_transform ( x ) sumstats_map = np . zeros (( x_expanded . shape [ 1 ], theta . shape [ 1 ])) for parameter_idx in range ( theta . shape [ 1 ]): regression_model = LinearRegression ( fit_intercept = True ) regression_model . fit ( X = x_expanded , y = theta [:, parameter_idx ], sample_weight = sample_weight ) sumstats_map [:, parameter_idx ] = regression_model . coef_ sumstats_map = torch . tensor ( sumstats_map , dtype = torch . float32 ) def sumstats_transform ( x ): x_expanded = torch . tensor ( expansion . fit_transform ( x ), dtype = torch . float32 ) return x_expanded . mm ( sumstats_map ) return sumstats_transform run_lra ( theta , x , observation , sample_weight = None ) inherited \u00b6 Return parameters adjusted with linear regression adjustment. Implementation as in Beaumont et al. 2002: https://arxiv.org/abs/1707.01254 Source code in sbi/inference/abc/mcabc.py @staticmethod def run_lra ( theta : torch . Tensor , x : torch . Tensor , observation : torch . Tensor , sample_weight = None , ) -> torch . Tensor : \"\"\"Return parameters adjusted with linear regression adjustment. Implementation as in Beaumont et al. 2002: https://arxiv.org/abs/1707.01254 \"\"\" theta_adjusted = theta for parameter_idx in range ( theta . shape [ 1 ]): regression_model = LinearRegression ( fit_intercept = True ) regression_model . fit ( X = x , y = theta [:, parameter_idx ], sample_weight = sample_weight , ) theta_adjusted [:, parameter_idx ] += regression_model . predict ( observation . reshape ( 1 , - 1 ) ) theta_adjusted [:, parameter_idx ] -= regression_model . predict ( x ) return theta_adjusted sbi.inference.abc.smcabc.SMCABC ( ABCBASE ) \u00b6 __call__ ( self , x_o , num_particles , num_initial_pop , num_simulations , epsilon_decay , distance_based_decay = False , ess_min = None , kernel_variance_scale = 1.0 , use_last_pop_samples = True , return_summary = False , kde = False , kde_kwargs = {}, kde_sample_weights = False , lra = False , lra_with_weights = False , sass = False , sass_fraction = 0.25 , sass_expansion_degree = 1 ) special \u00b6 Run SMCABC and return accepted parameters or KDE object fitted on them. Parameters: Name Type Description Default x_o Union[torch.Tensor, numpy.ndarray] Observed data. required num_particles int Number of particles in each population. required num_initial_pop int Number of simulations used for initial population. required num_simulations int Total number of possible simulations. required epsilon_decay float Factor with which the acceptance threshold \\(\\epsilon\\) decays. required distance_based_decay bool Whether the \\(\\epsilon\\) decay is constant over populations or calculated from the previous populations distribution of distances. False ess_min Optional[float] Threshold of effective sampling size for resampling weights. Not used when None (default). None kernel_variance_scale float Factor for scaling the perturbation kernel variance. 1.0 use_last_pop_samples bool Whether to fill up the current population with samples from the previous population when the budget is used up. If False, the current population is discarded and the previous population is returned. True lra bool Whether to run linear regression adjustment as in Beaumont et al. 2002 False lra_with_weights bool Whether to run lra as weighted linear regression with SMC weights False sass bool Whether to determine semi-automatic summary statistics as in Fearnhead & Prangle 2012. False sass_fraction float Fraction of simulation budget used for the initial sass run. 0.25 sass_expansion_degree int Degree of the polynomial feature expansion for the sass regression, default 1 - no expansion. 1 kde bool Whether to run KDE on the accepted parameters to return a KDE object from which one can sample. False kde_kwargs Dict[str, Any] kwargs for performing KDE: \u2018bandwidth=\u2019; either a float, or a string naming a bandwidth heuristics, e.g., \u2018cv\u2019 (cross validation), \u2018silvermann\u2019 or \u2018scott\u2019, default \u2018cv\u2019. \u2018transform\u2019: transform applied to the parameters before doing KDE. \u2018sample_weights\u2019: weights associated with samples. See \u2018get_kde\u2019 for more details {} kde_sample_weights bool Whether perform weighted KDE with SMC weights or on raw particles. False return_summary bool Whether to return a dictionary with all accepted particles, weights, etc. at the end. False Returns: Type Description theta (if kde False) accepted parameters of the last population. kde (if kde True): KDE object fitted on accepted parameters, from which one can .sample() and .log_prob(). summary (if return_summary True): dictionary containing the accepted paramters (if kde True), distances and simulated data x of all populations. Source code in sbi/inference/abc/smcabc.py def __call__ ( self , x_o : Union [ Tensor , ndarray ], num_particles : int , num_initial_pop : int , num_simulations : int , epsilon_decay : float , distance_based_decay : bool = False , ess_min : Optional [ float ] = None , kernel_variance_scale : float = 1.0 , use_last_pop_samples : bool = True , return_summary : bool = False , kde : bool = False , kde_kwargs : Dict [ str , Any ] = {}, kde_sample_weights : bool = False , lra : bool = False , lra_with_weights : bool = False , sass : bool = False , sass_fraction : float = 0.25 , sass_expansion_degree : int = 1 , ) -> Union [ Tensor , KDEWrapper , Tuple [ Tensor , dict ], Tuple [ KDEWrapper , dict ]]: r \"\"\"Run SMCABC and return accepted parameters or KDE object fitted on them. Args: x_o: Observed data. num_particles: Number of particles in each population. num_initial_pop: Number of simulations used for initial population. num_simulations: Total number of possible simulations. epsilon_decay: Factor with which the acceptance threshold $\\epsilon$ decays. distance_based_decay: Whether the $\\epsilon$ decay is constant over populations or calculated from the previous populations distribution of distances. ess_min: Threshold of effective sampling size for resampling weights. Not used when None (default). kernel_variance_scale: Factor for scaling the perturbation kernel variance. use_last_pop_samples: Whether to fill up the current population with samples from the previous population when the budget is used up. If False, the current population is discarded and the previous population is returned. lra: Whether to run linear regression adjustment as in Beaumont et al. 2002 lra_with_weights: Whether to run lra as weighted linear regression with SMC weights sass: Whether to determine semi-automatic summary statistics as in Fearnhead & Prangle 2012. sass_fraction: Fraction of simulation budget used for the initial sass run. sass_expansion_degree: Degree of the polynomial feature expansion for the sass regression, default 1 - no expansion. kde: Whether to run KDE on the accepted parameters to return a KDE object from which one can sample. kde_kwargs: kwargs for performing KDE: 'bandwidth='; either a float, or a string naming a bandwidth heuristics, e.g., 'cv' (cross validation), 'silvermann' or 'scott', default 'cv'. 'transform': transform applied to the parameters before doing KDE. 'sample_weights': weights associated with samples. See 'get_kde' for more details kde_sample_weights: Whether perform weighted KDE with SMC weights or on raw particles. return_summary: Whether to return a dictionary with all accepted particles, weights, etc. at the end. Returns: theta (if kde False): accepted parameters of the last population. kde (if kde True): KDE object fitted on accepted parameters, from which one can .sample() and .log_prob(). summary (if return_summary True): dictionary containing the accepted paramters (if kde True), distances and simulated data x of all populations. \"\"\" pop_idx = 0 self . num_simulations = num_simulations # Pilot run for SASS. if sass : num_pilot_simulations = int ( sass_fraction * num_simulations ) self . logger . info ( f \"Running SASS with { num_pilot_simulations } pilot samples.\" ) sass_transform = self . run_sass_set_xo ( num_particles , num_pilot_simulations , x_o , lra , sass_expansion_degree ) # Udpate simulator and xo x_o = sass_transform ( self . x_o ) def sass_simulator ( theta ): self . simulation_counter += theta . shape [ 0 ] return sass_transform ( self . _batched_simulator ( theta )) self . _simulate_with_budget = sass_simulator # run initial population particles , epsilon , distances , x = self . _set_xo_and_sample_initial_population ( x_o , num_particles , num_initial_pop ) log_weights = torch . log ( 1 / num_particles * ones ( num_particles )) self . logger . info ( ( f \"population= { pop_idx } , eps= { epsilon } , ess= { 1.0 } , \" f \"num_sims= { num_initial_pop } \" ) ) all_particles = [ particles ] all_log_weights = [ log_weights ] all_distances = [ distances ] all_epsilons = [ epsilon ] all_x = [ x ] while self . simulation_counter < self . num_simulations : pop_idx += 1 # Decay based on quantile of distances from previous pop. if distance_based_decay : epsilon = self . _get_next_epsilon ( all_distances [ pop_idx - 1 ], epsilon_decay ) # Constant decay. else : epsilon *= epsilon_decay # Get kernel variance from previous pop. self . kernel_variance = self . get_kernel_variance ( all_particles [ pop_idx - 1 ], torch . exp ( all_log_weights [ pop_idx - 1 ]), samples_per_dim = 500 , kernel_variance_scale = kernel_variance_scale , ) particles , log_weights , distances , x = self . _sample_next_population ( particles = all_particles [ pop_idx - 1 ], log_weights = all_log_weights [ pop_idx - 1 ], distances = all_distances [ pop_idx - 1 ], epsilon = epsilon , x = all_x [ pop_idx - 1 ], use_last_pop_samples = use_last_pop_samples , ) # Resample population if effective sampling size is too small. if ess_min is not None : particles , log_weights = self . resample_if_ess_too_small ( particles , log_weights , ess_min , pop_idx ) self . logger . info ( ( f \"population= { pop_idx } done: eps= { epsilon : .6f } ,\" f \" num_sims= { self . simulation_counter } .\" ) ) # collect results all_particles . append ( particles ) all_log_weights . append ( log_weights ) all_distances . append ( distances ) all_epsilons . append ( epsilon ) all_x . append ( x ) # Maybe run LRA and adjust weights. if lra : self . logger . info ( \"Running Linear regression adjustment.\" ) adjusted_particles , adjusted_weights = self . run_lra_update_weights ( particles = all_particles [ - 1 ], xs = all_x [ - 1 ], observation = process_x ( x_o ), log_weights = all_log_weights [ - 1 ], lra_with_weights = lra_with_weights , ) final_particles = adjusted_particles else : final_particles = all_particles [ - 1 ] if kde : self . logger . info ( f \"\"\"KDE on { final_particles . shape [ 0 ] } samples with bandwidth option { kde_kwargs [ \"bandwidth\" ] if \"bandwidth\" in kde_kwargs else \"cv\" } . Beware that KDE can give unreliable results when used with too few samples and in high dimensions.\"\"\" ) # Maybe get particles weights from last population for weighted KDE. if kde_sample_weights : kde_kwargs [ \"sample_weights\" ] = all_log_weights [ - 1 ] . exp () kde_dist = get_kde ( final_particles , ** kde_kwargs ) if return_summary : return ( kde_dist , dict ( particles = all_particles , weights = all_log_weights , epsilons = all_epsilons , distances = all_distances , xs = all_x , ), ) else : return kde_dist if return_summary : return ( final_particles , dict ( particles = all_particles , weights = all_log_weights , epsilons = all_epsilons , distances = all_distances , xs = all_x , ), ) else : return final_particles __init__ ( self , simulator , prior , distance = 'l2' , num_workers = 1 , simulation_batch_size = 1 , show_progress_bars = True , kernel = 'gaussian' , algorithm_variant = 'C' ) special \u00b6 Sequential Monte Carlo Approximate Bayesian Computation. We distinguish between three different SMC methods here: - A: Toni et al. 2010 (Phd Thesis) - B: Sisson et al. 2007 (with correction from 2009) - C: Beaumont et al. 2009 In Toni et al. 2010 we find an overview of the differences on page 34: - B: same as A except for resampling of weights if the effective sampling size is too small. - C: same as A except for calculation of the covariance of the perturbation kernel: the kernel covariance is a scaled version of the covariance of the previous population. Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\mathrm{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required prior Distribution A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. required distance Union[str, Callable] Distance function to compare observed and simulated data. Can be a custom function or one of l1 , l2 , mse . 'l2' num_workers int Number of parallel workers to use for simulations. 1 simulation_batch_size int Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). 1 show_progress_bars bool Whether to show a progressbar during simulation and sampling. True kernel Optional[str] Perturbation kernel. 'gaussian' algorithm_variant str Indicating the choice of algorithm variant, A, B, or C. 'C' Source code in sbi/inference/abc/smcabc.py def __init__ ( self , simulator : Callable , prior : Distribution , distance : Union [ str , Callable ] = \"l2\" , num_workers : int = 1 , simulation_batch_size : int = 1 , show_progress_bars : bool = True , kernel : Optional [ str ] = \"gaussian\" , algorithm_variant : str = \"C\" , ): r \"\"\"Sequential Monte Carlo Approximate Bayesian Computation. We distinguish between three different SMC methods here: - A: Toni et al. 2010 (Phd Thesis) - B: Sisson et al. 2007 (with correction from 2009) - C: Beaumont et al. 2009 In Toni et al. 2010 we find an overview of the differences on page 34: - B: same as A except for resampling of weights if the effective sampling size is too small. - C: same as A except for calculation of the covariance of the perturbation kernel: the kernel covariance is a scaled version of the covariance of the previous population. Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\mathrm{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. distance: Distance function to compare observed and simulated data. Can be a custom function or one of `l1`, `l2`, `mse`. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). show_progress_bars: Whether to show a progressbar during simulation and sampling. kernel: Perturbation kernel. algorithm_variant: Indicating the choice of algorithm variant, A, B, or C. \"\"\" super () . __init__ ( simulator = simulator , prior = prior , distance = distance , num_workers = num_workers , simulation_batch_size = simulation_batch_size , show_progress_bars = show_progress_bars , ) kernels = ( \"gaussian\" , \"uniform\" ) assert ( kernel in kernels ), f \"Kernel ' { kernel } ' not supported. Choose one from { kernels } .\" self . kernel = kernel algorithm_variants = ( \"A\" , \"B\" , \"C\" ) assert algorithm_variant in algorithm_variants , ( f \"SMCABC variant ' { algorithm_variant } ' not supported, choose one from\" \" {algorithm_variants} .\" ) self . algorithm_variant = algorithm_variant self . distance_to_x0 = None self . simulation_counter = 0 self . num_simulations = 0 # Define simulator that keeps track of budget. def simulate_with_budget ( theta ): self . simulation_counter += theta . shape [ 0 ] return self . _batched_simulator ( theta ) self . _simulate_with_budget = simulate_with_budget get_distance_function ( distance_type = 'l2' ) inherited \u00b6 Return distance function for given distance type. Parameters: Name Type Description Default distance_type Union[str, Callable] string indicating the distance type, e.g., \u2018l2\u2019, \u2018l1\u2019, \u2018mse\u2019. Note that the returned distance function averages over the last dimension, e.g., over the summary statistics. 'l2' Returns: Type Description distance_fun distance functions built from passe string. Returns distance_type is callable. Source code in sbi/inference/abc/smcabc.py @staticmethod def get_distance_function ( distance_type : Union [ str , Callable ] = \"l2\" ) -> Callable : \"\"\"Return distance function for given distance type. Args: distance_type: string indicating the distance type, e.g., 'l2', 'l1', 'mse'. Note that the returned distance function averages over the last dimension, e.g., over the summary statistics. Returns: distance_fun: distance functions built from passe string. Returns distance_type is callable. \"\"\" if isinstance ( distance_type , Callable ): return distance_type distances = [ \"l1\" , \"l2\" , \"mse\" ] assert ( distance_type in distances ), f \" { distance_type } must be one of { distances } .\" if distance_type == \"mse\" : distance = lambda xo , x : torch . mean (( xo - x ) ** 2 , dim =- 1 ) elif distance_type == \"l2\" : distance = lambda xo , x : torch . norm (( xo - x ), dim =- 1 ) elif distance_type == \"l1\" : distance = lambda xo , x : torch . mean ( abs ( xo - x ), dim =- 1 ) else : raise ValueError ( r \"Distance {distance_type} not supported.\" ) def distance_fun ( observed_data : Tensor , simulated_data : Tensor ) -> Tensor : \"\"\"Return distance over batch dimension. Args: observed_data: Observed data, could be 1D. simulated_data: Batch of simulated data, has batch dimension. Returns: Torch tensor with batch of distances. \"\"\" assert simulated_data . ndim == 2 , \"simulated data needs batch dimension\" return distance ( observed_data , simulated_data ) return distance_fun get_new_kernel ( self , thetas ) \u00b6 Return new kernel distribution for a given set of paramters. Source code in sbi/inference/abc/smcabc.py def get_new_kernel ( self , thetas : Tensor ) -> Distribution : \"\"\"Return new kernel distribution for a given set of paramters.\"\"\" if self . kernel == \"gaussian\" : assert self . kernel_variance . ndim == 2 return MultivariateNormal ( loc = thetas , covariance_matrix = self . kernel_variance ) elif self . kernel == \"uniform\" : low = thetas - self . kernel_variance high = thetas + self . kernel_variance # Move batch shape to event shape to get Uniform that is multivariate in # parameter dimension. return Uniform ( low = low , high = high ) . to_event ( 1 ) else : raise ValueError ( f \"Kernel, ' { self . kernel } ' not supported.\" ) get_particle_ranges ( self , particles , weights , samples_per_dim = 100 ) \u00b6 Return range of particles in each parameter dimension. Source code in sbi/inference/abc/smcabc.py def get_particle_ranges ( self , particles : Tensor , weights : Tensor , samples_per_dim : int = 100 ) -> Tensor : \"\"\"Return range of particles in each parameter dimension.\"\"\" # get weighted samples samples = self . sample_from_population_with_weights ( particles , weights , num_samples = samples_per_dim * particles . shape [ 1 ], ) # Variance spans the range of particles for every dimension. particle_ranges = samples . max ( 0 ) . values - samples . min ( 0 ) . values assert particle_ranges . ndim < 2 return particle_ranges get_sass_transform ( theta , x , expansion_degree = 1 , sample_weight = None ) inherited \u00b6 Return semi-automatic summary statitics function. Running weighted linear regressin as in Fearnhead & Prandle 2012: https://arxiv.org/abs/1004.1112 Following implementation in https://abcpy.readthedocs.io/en/latest/_modules/abcpy/statistics.html#Identity and https://pythonhosted.org/abcpy/_modules/abcpy/summaryselections.html#Semiautomatic Source code in sbi/inference/abc/smcabc.py @staticmethod def get_sass_transform ( theta : torch . Tensor , x : torch . Tensor , expansion_degree : int = 1 , sample_weight = None , ) -> Callable : \"\"\"Return semi-automatic summary statitics function. Running weighted linear regressin as in Fearnhead & Prandle 2012: https://arxiv.org/abs/1004.1112 Following implementation in https://abcpy.readthedocs.io/en/latest/_modules/abcpy/statistics.html#Identity and https://pythonhosted.org/abcpy/_modules/abcpy/summaryselections.html#Semiautomatic \"\"\" expansion = PolynomialFeatures ( degree = expansion_degree , include_bias = False ) # Transform x, remove intercept. x_expanded = expansion . fit_transform ( x ) sumstats_map = np . zeros (( x_expanded . shape [ 1 ], theta . shape [ 1 ])) for parameter_idx in range ( theta . shape [ 1 ]): regression_model = LinearRegression ( fit_intercept = True ) regression_model . fit ( X = x_expanded , y = theta [:, parameter_idx ], sample_weight = sample_weight ) sumstats_map [:, parameter_idx ] = regression_model . coef_ sumstats_map = torch . tensor ( sumstats_map , dtype = torch . float32 ) def sumstats_transform ( x ): x_expanded = torch . tensor ( expansion . fit_transform ( x ), dtype = torch . float32 ) return x_expanded . mm ( sumstats_map ) return sumstats_transform resample_if_ess_too_small ( self , particles , log_weights , ess_min , pop_idx ) \u00b6 Return resampled particles and uniform weights if effectice sampling size is too small. Source code in sbi/inference/abc/smcabc.py def resample_if_ess_too_small ( self , particles : Tensor , log_weights : Tensor , ess_min : float , pop_idx : int , ) -> Tuple [ Tensor , Tensor ]: \"\"\"Return resampled particles and uniform weights if effectice sampling size is too small. \"\"\" num_particles = particles . shape [ 0 ] ess = ( 1 / torch . sum ( torch . exp ( 2.0 * log_weights ), dim = 0 )) / num_particles # Resampling of weights for low ESS only for Sisson et al. 2007. if ess < ess_min : self . logger . info ( f \"ESS= { ess : .2f } too low, resampling pop { pop_idx } ...\" ) # First resample, then set to uniform weights as in Sisson et al. 2007. particles = self . sample_from_population_with_weights ( particles , torch . exp ( log_weights ), num_samples = num_particles ) log_weights = torch . log ( 1 / num_particles * ones ( num_particles )) return particles , log_weights run_lra ( theta , x , observation , sample_weight = None ) inherited \u00b6 Return parameters adjusted with linear regression adjustment. Implementation as in Beaumont et al. 2002: https://arxiv.org/abs/1707.01254 Source code in sbi/inference/abc/smcabc.py @staticmethod def run_lra ( theta : torch . Tensor , x : torch . Tensor , observation : torch . Tensor , sample_weight = None , ) -> torch . Tensor : \"\"\"Return parameters adjusted with linear regression adjustment. Implementation as in Beaumont et al. 2002: https://arxiv.org/abs/1707.01254 \"\"\" theta_adjusted = theta for parameter_idx in range ( theta . shape [ 1 ]): regression_model = LinearRegression ( fit_intercept = True ) regression_model . fit ( X = x , y = theta [:, parameter_idx ], sample_weight = sample_weight , ) theta_adjusted [:, parameter_idx ] += regression_model . predict ( observation . reshape ( 1 , - 1 ) ) theta_adjusted [:, parameter_idx ] -= regression_model . predict ( x ) return theta_adjusted run_lra_update_weights ( self , particles , xs , observation , log_weights , lra_with_weights ) \u00b6 Return particles and weights adjusted with LRA. Runs (weighted) linear regression from xs onto particles to adjust the particles. Updates the SMC weights according to the new particles. Source code in sbi/inference/abc/smcabc.py def run_lra_update_weights ( self , particles : Tensor , xs : Tensor , observation : Tensor , log_weights : Tensor , lra_with_weights : bool , ) -> Tuple [ Tensor , Tensor ]: \"\"\"Return particles and weights adjusted with LRA. Runs (weighted) linear regression from xs onto particles to adjust the particles. Updates the SMC weights according to the new particles. \"\"\" adjusted_particels = self . run_lra ( theta = particles , x = xs , observation = observation , sample_weight = log_weights . exp () if lra_with_weights else None , ) # Update SMC weights with LRA adjusted weights adjusted_log_weights = self . _calculate_new_log_weights ( new_particles = adjusted_particels , old_particles = particles , old_log_weights = log_weights , ) return adjusted_particels , adjusted_log_weights run_sass_set_xo ( self , num_particles , num_pilot_simulations , x_o , lra = False , sass_expansion_degree = 1 ) \u00b6 Return transform for semi-automatic summary statistics. Runs an single round of rejection abc with fixed budget and accepts num_particles simulations to run the regression for sass. Sets self.x_o once the x_shape can be derived from simulations. Source code in sbi/inference/abc/smcabc.py def run_sass_set_xo ( self , num_particles : int , num_pilot_simulations : int , x_o , lra : bool = False , sass_expansion_degree : int = 1 , ) -> Callable : \"\"\"Return transform for semi-automatic summary statistics. Runs an single round of rejection abc with fixed budget and accepts num_particles simulations to run the regression for sass. Sets self.x_o once the x_shape can be derived from simulations. \"\"\" ( pilot_particles , _ , _ , pilot_xs , ) = self . _set_xo_and_sample_initial_population ( x_o , num_particles , num_pilot_simulations ) # Adjust with LRA. if lra : pilot_particles = self . run_lra ( pilot_particles , pilot_xs , self . x_o ) sass_transform = self . get_sass_transform ( pilot_particles , pilot_xs , expansion_degree = sass_expansion_degree , sample_weight = None , ) return sass_transform sample_from_population_with_weights ( particles , weights , num_samples = 1 ) staticmethod \u00b6 Return samples from particles sampled with weights. Source code in sbi/inference/abc/smcabc.py @staticmethod def sample_from_population_with_weights ( particles : Tensor , weights : Tensor , num_samples : int = 1 ) -> Tensor : \"\"\"Return samples from particles sampled with weights.\"\"\" # define multinomial with weights as probs multi = Multinomial ( probs = weights ) # sample num samples, with replacement samples = multi . sample ( sample_shape = torch . Size (( num_samples ,))) # get indices of success trials indices = torch . where ( samples )[ 1 ] # return those indices from trace return particles [ indices ] Posteriors \u00b6 sbi.inference.posteriors.direct_posterior.DirectPosterior ( NeuralPosterior ) \u00b6 Posterior \\(p(\\theta|x_o)\\) with log_prob() and sample() methods, only applicable to SNPE. SNPE trains a neural network to directly approximate the posterior distribution. However, for bounded priors, the neural network can have leakage: it puts non-zero mass in regions where the prior is zero. The DirectPosterior class wraps the trained network to deal with these cases. Specifically, this class offers the following functionality: - correct the calculation of the log probability such that it compensates for the leakage. - reject samples that lie outside of the prior bounds. This class can not be used in combination with SNLE or SNRE. default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. __init__ ( self , posterior_estimator , prior , max_sampling_batch_size = 10000 , device = None , x_shape = None , enable_transform = True ) special \u00b6 Parameters: Name Type Description Default prior Distribution Prior distribution with .log_prob() and .sample() . required posterior_estimator Flow The trained neural posterior. required max_sampling_batch_size int Batchsize of samples being drawn from the proposal at every iteration. 10000 device Optional[str] Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:0\u201d. If None, potential_fn.device is used. None x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None enable_transform bool Whether to transform parameters to unconstrained space during MAP optimization. When False, an identity transform will be returned for theta_transform . True Source code in sbi/inference/posteriors/direct_posterior.py def __init__ ( self , posterior_estimator : flows . Flow , prior : Distribution , max_sampling_batch_size : int = 10_000 , device : Optional [ str ] = None , x_shape : Optional [ torch . Size ] = None , enable_transform : bool = True , ): \"\"\" Args: prior: Prior distribution with `.log_prob()` and `.sample()`. posterior_estimator: The trained neural posterior. max_sampling_batch_size: Batchsize of samples being drawn from the proposal at every iteration. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:0\". If None, `potential_fn.device` is used. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. enable_transform: Whether to transform parameters to unconstrained space during MAP optimization. When False, an identity transform will be returned for `theta_transform`. \"\"\" # Because `DirectPosterior` does not take the `potential_fn` as input, it # builds it itself. The `potential_fn` and `theta_transform` are used only for # obtaining the MAP. check_prior ( prior ) potential_fn , theta_transform = posterior_estimator_based_potential ( posterior_estimator , prior , x_o = None , enable_transform = enable_transform , ) super () . __init__ ( potential_fn = potential_fn , theta_transform = theta_transform , device = device , x_shape = x_shape , ) self . prior = prior self . posterior_estimator = posterior_estimator self . max_sampling_batch_size = max_sampling_batch_size self . _leakage_density_correction_factor = None self . _purpose = \"\"\"It samples the posterior network and rejects samples that lie outside of the prior bounds.\"\"\" leakage_correction ( self , x , num_rejection_samples = 10000 , force_update = False , show_progress_bars = False , rejection_sampling_batch_size = 10000 ) \u00b6 Return leakage correction factor for a leaky posterior density estimate. The factor is estimated from the acceptance probability during rejection sampling from the posterior. This is to avoid re-estimating the acceptance probability from scratch whenever log_prob is called and norm_posterior=True . Here, it is estimated only once for self.default_x and saved for later. We re-evaluate only whenever a new x is passed. Parameters: Name Type Description Default num_rejection_samples int Number of samples used to estimate correction factor. 10000 show_progress_bars bool Whether to show a progress bar during sampling. False rejection_sampling_batch_size int Batch size for rejection sampling. 10000 Returns: Type Description Tensor Saved or newly-estimated correction factor (as a scalar Tensor ). Source code in sbi/inference/posteriors/direct_posterior.py @torch . no_grad () def leakage_correction ( self , x : Tensor , num_rejection_samples : int = 10_000 , force_update : bool = False , show_progress_bars : bool = False , rejection_sampling_batch_size : int = 10_000 , ) -> Tensor : r \"\"\"Return leakage correction factor for a leaky posterior density estimate. The factor is estimated from the acceptance probability during rejection sampling from the posterior. This is to avoid re-estimating the acceptance probability from scratch whenever `log_prob` is called and `norm_posterior=True`. Here, it is estimated only once for `self.default_x` and saved for later. We re-evaluate only whenever a new `x` is passed. Arguments: num_rejection_samples: Number of samples used to estimate correction factor. show_progress_bars: Whether to show a progress bar during sampling. rejection_sampling_batch_size: Batch size for rejection sampling. Returns: Saved or newly-estimated correction factor (as a scalar `Tensor`). \"\"\" def acceptance_at ( x : Tensor ) -> Tensor : return accept_reject_sample ( proposal = self . posterior_estimator , accept_reject_fn = lambda theta : within_support ( self . prior , theta ), num_samples = num_rejection_samples , show_progress_bars = show_progress_bars , sample_for_correction_factor = True , max_sampling_batch_size = rejection_sampling_batch_size , proposal_sampling_kwargs = { \"context\" : x }, )[ 1 ] # Check if the provided x matches the default x (short-circuit on identity). is_new_x = self . default_x is None or ( x is not self . default_x and ( x != self . default_x ) . any () ) not_saved_at_default_x = self . _leakage_density_correction_factor is None if is_new_x : # Calculate at x; don't save. return acceptance_at ( x ) elif not_saved_at_default_x or force_update : # Calculate at default_x; save. assert self . default_x is not None self . _leakage_density_correction_factor = acceptance_at ( self . default_x ) return self . _leakage_density_correction_factor # type: ignore log_prob ( self , theta , x = None , norm_posterior = True , track_gradients = False , leakage_correction_params = None ) \u00b6 Returns the log-probability of the posterior \\(p(\\theta|x)\\) . Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required norm_posterior bool Whether to enforce a normalized posterior density. Renormalization of the posterior is useful when some probability falls out or leaks out of the prescribed prior support. The normalizing factor is calculated via rejection sampling, so if you need speedier but unnormalized log posterior estimates set here norm_posterior=False . The returned log posterior is set to -\u221e outside of the prior support regardless of this setting. True track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False leakage_correction_params Optional[dict] A dict of keyword arguments to override the default values of leakage_correction() . Possible options are: num_rejection_samples , force_update , show_progress_bars , and rejection_sampling_batch_size . These parameters only have an effect if norm_posterior=True . None Returns: Type Description Tensor (len(\u03b8),) -shaped log posterior probability \\(\\log p(\\theta|x)\\) for \u03b8 in the support of the prior, -\u221e (corresponding to 0 probability) outside. Source code in sbi/inference/posteriors/direct_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , norm_posterior : bool = True , track_gradients : bool = False , leakage_correction_params : Optional [ dict ] = None , ) -> Tensor : r \"\"\"Returns the log-probability of the posterior $p(\\theta|x)$. Args: theta: Parameters $\\theta$. norm_posterior: Whether to enforce a normalized posterior density. Renormalization of the posterior is useful when some probability falls out or leaks out of the prescribed prior support. The normalizing factor is calculated via rejection sampling, so if you need speedier but unnormalized log posterior estimates set here `norm_posterior=False`. The returned log posterior is set to -\u221e outside of the prior support regardless of this setting. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. leakage_correction_params: A `dict` of keyword arguments to override the default values of `leakage_correction()`. Possible options are: `num_rejection_samples`, `force_update`, `show_progress_bars`, and `rejection_sampling_batch_size`. These parameters only have an effect if `norm_posterior=True`. Returns: `(len(\u03b8),)`-shaped log posterior probability $\\log p(\\theta|x)$ for \u03b8 in the support of the prior, -\u221e (corresponding to 0 probability) outside. \"\"\" x = self . _x_else_default_x ( x ) # TODO Train exited here, entered after sampling? self . posterior_estimator . eval () theta = ensure_theta_batched ( torch . as_tensor ( theta )) theta_repeated , x_repeated = match_theta_and_x_batch_shapes ( theta , x ) with torch . set_grad_enabled ( track_gradients ): # Evaluate on device, move back to cpu for comparison with prior. unnorm_log_prob = self . posterior_estimator . log_prob ( theta_repeated , context = x_repeated ) # Force probability to be zero outside prior support. in_prior_support = within_support ( self . prior , theta_repeated ) masked_log_prob = torch . where ( in_prior_support , unnorm_log_prob , torch . tensor ( float ( \"-inf\" ), dtype = torch . float32 , device = self . _device ), ) if leakage_correction_params is None : leakage_correction_params = dict () # use defaults log_factor = ( log ( self . leakage_correction ( x = x , ** leakage_correction_params )) if norm_posterior else 0 ) return masked_log_prob - log_factor map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'posterior' , num_init_samples = 1000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[torch.Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, torch.Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'posterior' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 1000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/direct_posterior.py def map ( self , x : Optional [ Tensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , Tensor ] = \"posterior\" , num_init_samples : int = 1_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/direct_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , max_sampling_batch_size = 10000 , sample_with = None , show_progress_bars = True ) \u00b6 Return samples from posterior distribution \\(p(\\theta|x)\\) . Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw sample_shape.numel() samples and then reshape into the desired shape. torch.Size([]) sample_with Optional[str] This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. If it is set, we instantly raise an error. None show_progress_bars bool Whether to show sampling progress monitor. True Source code in sbi/inference/posteriors/direct_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , max_sampling_batch_size : int = 10_000 , sample_with : Optional [ str ] = None , show_progress_bars : bool = True , ) -> Tensor : r \"\"\"Return samples from posterior distribution $p(\\theta|x)$. Args: sample_shape: Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw `sample_shape.numel()` samples and then reshape into the desired shape. sample_with: This argument only exists to keep backward-compatibility with `sbi` v0.17.2 or older. If it is set, we instantly raise an error. show_progress_bars: Whether to show sampling progress monitor. \"\"\" num_samples = torch . Size ( sample_shape ) . numel () x = self . _x_else_default_x ( x ) max_sampling_batch_size = ( self . max_sampling_batch_size if max_sampling_batch_size is None else max_sampling_batch_size ) if sample_with is not None : raise ValueError ( f \"You set `sample_with= { sample_with } `. As of sbi v0.18.0, setting \" f \"`sample_with` is no longer supported. You have to rerun \" f \"`.build_posterior(sample_with= { sample_with } ).`\" ) samples = accept_reject_sample ( proposal = self . posterior_estimator , accept_reject_fn = lambda theta : within_support ( self . prior , theta ), num_samples = num_samples , show_progress_bars = show_progress_bars , max_sampling_batch_size = max_sampling_batch_size , proposal_sampling_kwargs = { \"context\" : x }, alternative_method = \"build_posterior(..., sample_with='mcmc')\" , )[ 0 ] return samples set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/direct_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self sbi.inference.posteriors.importance_posterior.ImportanceSamplingPosterior ( NeuralPosterior ) \u00b6 Provides importance sampling to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). ImportanceSamplingPosterior allows to estimate the posterior log-probability by estimating the normlalization constant with importance sampling. It also allows to perform importance sampling (with .sample() ) and to draw approximate samples with sampling-importance-resampling (SIR) (with .sir_sample() ) default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. __init__ ( self , potential_fn , proposal , theta_transform = None , method = 'sir' , oversampling_factor = 32 , max_sampling_batch_size = 10000 , device = None , x_shape = None ) special \u00b6 Parameters: Name Type Description Default potential_fn Callable The potential function from which to draw samples. required proposal Any The proposal distribution. required theta_transform Optional[torch Transform] Transformation that is applied to parameters. Is not used during but only when calling .map() . None method str Either of [ sir | importance ]. This sets the behavior of the .sample() method. With sir , approximate posterior samples are generated with sampling importance resampling (SIR). With importance , the .sample() method returns a tuple of samples and corresponding importance weights. 'sir' oversampling_factor int Number of proposed samples from which only one is selected based on its importance weight. 32 max_sampling_batch_size int The batch size of samples being drawn from the proposal at every iteration. 10000 device Optional[str] Device on which to sample, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:0\u201d. If None, potential_fn.device is used. None x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None Source code in sbi/inference/posteriors/importance_posterior.py def __init__ ( self , potential_fn : Callable , proposal : Any , theta_transform : Optional [ TorchTransform ] = None , method : str = \"sir\" , oversampling_factor : int = 32 , max_sampling_batch_size : int = 10_000 , device : Optional [ str ] = None , x_shape : Optional [ torch . Size ] = None , ): \"\"\" Args: potential_fn: The potential function from which to draw samples. proposal: The proposal distribution. theta_transform: Transformation that is applied to parameters. Is not used during but only when calling `.map()`. method: Either of [`sir`|`importance`]. This sets the behavior of the `.sample()` method. With `sir`, approximate posterior samples are generated with sampling importance resampling (SIR). With `importance`, the `.sample()` method returns a tuple of samples and corresponding importance weights. oversampling_factor: Number of proposed samples from which only one is selected based on its importance weight. max_sampling_batch_size: The batch size of samples being drawn from the proposal at every iteration. device: Device on which to sample, e.g., \"cpu\", \"cuda\" or \"cuda:0\". If None, `potential_fn.device` is used. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. \"\"\" super () . __init__ ( potential_fn , theta_transform = theta_transform , device = device , x_shape = x_shape , ) self . proposal = proposal self . _normalization_constant = None self . method = method self . oversampling_factor = oversampling_factor self . max_sampling_batch_size = max_sampling_batch_size self . _purpose = ( \"It provides sampling-importance resampling (SIR) to .sample() from the \" \"posterior and can evaluate the _unnormalized_ posterior density with \" \".log_prob().\" ) estimate_normalization_constant ( self , x , num_samples = 10000 , force_update = False ) \u00b6 Returns the normalization constant via importance sampling. Parameters: Name Type Description Default num_samples int Number of importance samples used for the estimate. 10000 force_update bool Whether to re-calculate the normlization constant when x is unchanged and have a cached value. False Source code in sbi/inference/posteriors/importance_posterior.py @torch . no_grad () def estimate_normalization_constant ( self , x : Tensor , num_samples : int = 10_000 , force_update : bool = False ) -> Tensor : \"\"\"Returns the normalization constant via importance sampling. Args: num_samples: Number of importance samples used for the estimate. force_update: Whether to re-calculate the normlization constant when x is unchanged and have a cached value. \"\"\" # Check if the provided x matches the default x (short-circuit on identity). is_new_x = self . default_x is None or ( x is not self . default_x and ( x != self . default_x ) . any () ) not_saved_at_default_x = self . _normalization_constant is None if is_new_x : # Calculate at x; don't save. _ , log_importance_weights = importance_sample ( self . potential_fn , proposal = self . proposal , num_samples = num_samples , ) return torch . mean ( torch . exp ( log_importance_weights )) elif not_saved_at_default_x or force_update : # Calculate at default_x; save. assert self . default_x is not None _ , log_importance_weights = importance_sample ( self . potential_fn , proposal = self . proposal , num_samples = num_samples , ) self . _normalization_constant = torch . mean ( torch . exp ( log_importance_weights )) return self . _normalization_constant . to ( self . _device ) # type: ignore log_prob ( self , theta , x = None , track_gradients = False , normalization_constant_params = None ) \u00b6 Returns the log-probability of theta under the posterior. The normalization constant is estimated with importance sampling. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False normalization_constant_params Optional[dict] Parameters passed on to estimate_normalization_constant() . None Returns: Type Description Tensor len($\\theta$) -shaped log-probability. Source code in sbi/inference/posteriors/importance_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False , normalization_constant_params : Optional [ dict ] = None , ) -> Tensor : r \"\"\"Returns the log-probability of theta under the posterior. The normalization constant is estimated with importance sampling. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. normalization_constant_params: Parameters passed on to `estimate_normalization_constant()`. Returns: `len($\\theta$)`-shaped log-probability. \"\"\" x = self . _x_else_default_x ( x ) self . potential_fn . set_x ( x ) theta = ensure_theta_batched ( torch . as_tensor ( theta )) with torch . set_grad_enabled ( track_gradients ): potential_values = self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) if normalization_constant_params is None : normalization_constant_params = dict () # use defaults normalization_constant = self . estimate_normalization_constant ( x , ** normalization_constant_params ) return ( potential_values - torch . log ( normalization_constant )) . to ( self . _device ) map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'proposal' , num_init_samples = 1000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[torch.Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, torch.Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'proposal' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 1000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/importance_posterior.py def map ( self , x : Optional [ Tensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , Tensor ] = \"proposal\" , num_init_samples : int = 1_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/importance_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , oversampling_factor = 32 , max_sampling_batch_size = 10000 , sample_with = None ) \u00b6 Return samples from the approximate posterior distribution. Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] description torch.Size([]) x Optional[torch.Tensor] description None Source code in sbi/inference/posteriors/importance_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , oversampling_factor : int = 32 , max_sampling_batch_size : int = 10_000 , sample_with : Optional [ str ] = None , ) -> Union [ Tensor , Tuple [ Tensor , Tensor ]]: \"\"\"Return samples from the approximate posterior distribution. Args: sample_shape: _description_ x: _description_ \"\"\" if sample_with is not None : raise ValueError ( f \"You set `sample_with= { sample_with } `. As of sbi v0.18.0, setting \" f \"`sample_with` is no longer supported. You have to rerun \" f \"`.build_posterior(sample_with= { sample_with } ).`\" ) self . potential_fn . set_x ( self . _x_else_default_x ( x )) if self . method == \"sir\" : return self . _sir_sample ( sample_shape , oversampling_factor = oversampling_factor , max_sampling_batch_size = max_sampling_batch_size , ) elif self . method == \"importance\" : return self . _importance_sample ( sample_shape ) else : raise NameError set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/importance_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self sbi.inference.posteriors.mcmc_posterior.MCMCPosterior ( NeuralPosterior ) \u00b6 Provides MCMC to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). MCMCPosterior allows to sample from the posterior with MCMC. default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. mcmc_method : str property writable \u00b6 Returns MCMC method. posterior_sampler property readonly \u00b6 Returns sampler created by sample . __init__ ( self , potential_fn , proposal , theta_transform = None , method = 'slice_np' , thin = 10 , warmup_steps = 10 , num_chains = 1 , init_strategy = 'resample' , init_strategy_parameters = {}, init_strategy_num_candidates = None , num_workers = 1 , device = None , x_shape = None ) special \u00b6 Parameters: Name Type Description Default potential_fn Callable The potential function from which to draw samples. required proposal Any Proposal distribution that is used to initialize the MCMC chain. required theta_transform Optional[torch Transform] Transformation that will be applied during sampling. Allows to perform MCMC in unconstrained space. None method str Method used for MCMC sampling, one of slice_np , slice_np_vectorized , slice , hmc , nuts . slice_np is a custom numpy implementation of slice sampling. slice_np_vectorized is identical to slice_np , but if num_chains>1 , the chains are vectorized for slice_np_vectorized whereas they are run sequentially for slice_np . The samplers hmc , nuts or slice sample with Pyro. 'slice_np' thin int The thinning factor for the chain. 10 warmup_steps int The initial number of samples to discard. 10 num_chains int The number of chains. 1 init_strategy str The initialisation strategy for chains; proposal will draw init locations from proposal , whereas sir will use Sequential- Importance-Resampling (SIR). SIR initially samples init_strategy_num_candidates from the proposal , evaluates all of them under the potential_fn and proposal , and then resamples the initial locations with weights proportional to exp(potential_fn - proposal.log_prob . resample is the same as sir but uses exp(potential_fn) as weights. 'resample' init_strategy_parameters Dict[str, Any] Dictionary of keyword arguments passed to the init strategy, e.g., for init_strategy=sir this could be num_candidate_samples , i.e., the number of candidates to to find init locations (internal default is 1000 ), or device . {} init_strategy_num_candidates Optional[int] Number of candidates to to find init locations in init_strategy=sir (deprecated, use init_strategy_parameters instead). None num_workers int number of cpu cores used to parallelize mcmc 1 device Optional[str] Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:0\u201d. If None, potential_fn.device is used. None x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None Source code in sbi/inference/posteriors/mcmc_posterior.py def __init__ ( self , potential_fn : Callable , proposal : Any , theta_transform : Optional [ TorchTransform ] = None , method : str = \"slice_np\" , thin : int = 10 , warmup_steps : int = 10 , num_chains : int = 1 , init_strategy : str = \"resample\" , init_strategy_parameters : Dict [ str , Any ] = {}, init_strategy_num_candidates : Optional [ int ] = None , num_workers : int = 1 , device : Optional [ str ] = None , x_shape : Optional [ torch . Size ] = None , ): \"\"\" Args: potential_fn: The potential function from which to draw samples. proposal: Proposal distribution that is used to initialize the MCMC chain. theta_transform: Transformation that will be applied during sampling. Allows to perform MCMC in unconstrained space. method: Method used for MCMC sampling, one of `slice_np`, `slice_np_vectorized`, `slice`, `hmc`, `nuts`. `slice_np` is a custom numpy implementation of slice sampling. `slice_np_vectorized` is identical to `slice_np`, but if `num_chains>1`, the chains are vectorized for `slice_np_vectorized` whereas they are run sequentially for `slice_np`. The samplers `hmc`, `nuts` or `slice` sample with Pyro. thin: The thinning factor for the chain. warmup_steps: The initial number of samples to discard. num_chains: The number of chains. init_strategy: The initialisation strategy for chains; `proposal` will draw init locations from `proposal`, whereas `sir` will use Sequential- Importance-Resampling (SIR). SIR initially samples `init_strategy_num_candidates` from the `proposal`, evaluates all of them under the `potential_fn` and `proposal`, and then resamples the initial locations with weights proportional to `exp(potential_fn - proposal.log_prob`. `resample` is the same as `sir` but uses `exp(potential_fn)` as weights. init_strategy_parameters: Dictionary of keyword arguments passed to the init strategy, e.g., for `init_strategy=sir` this could be `num_candidate_samples`, i.e., the number of candidates to to find init locations (internal default is `1000`), or `device`. init_strategy_num_candidates: Number of candidates to to find init locations in `init_strategy=sir` (deprecated, use init_strategy_parameters instead). num_workers: number of cpu cores used to parallelize mcmc device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:0\". If None, `potential_fn.device` is used. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. \"\"\" super () . __init__ ( potential_fn , theta_transform = theta_transform , device = device , x_shape = x_shape , ) self . proposal = proposal self . method = method self . thin = thin self . warmup_steps = warmup_steps self . num_chains = num_chains self . init_strategy = init_strategy self . init_strategy_parameters = init_strategy_parameters self . num_workers = num_workers self . _posterior_sampler = None # Hardcode parameter name to reduce clutter kwargs. self . param_name = \"theta\" if init_strategy_num_candidates is not None : warn ( \"\"\"Passing `init_strategy_num_candidates` is deprecated as of sbi v0.19.0. Instead, use e.g., `init_strategy_parameters={\"num_candidate_samples\": 1000}`\"\"\" ) self . init_strategy_parameters [ \"num_candidate_samples\" ] = init_strategy_num_candidates self . potential_ = self . _prepare_potential ( method ) self . _purpose = ( \"It provides MCMC to .sample() from the posterior and \" \"can evaluate the _unnormalized_ posterior density with .log_prob().\" ) get_arviz_inference_data ( self ) \u00b6 Returns arviz InferenceData object constructed most recent samples. Note: the InferenceData is constructed using the posterior samples generated in most recent call to .sample(...) . For Pyro HMC and NUTS kernels InferenceData will contain diagnostics, for Pyro Slice or sbi slice sampling samples, only the samples are added. Returns: Type Description inference_data Arviz InferenceData object. Source code in sbi/inference/posteriors/mcmc_posterior.py def get_arviz_inference_data ( self ) -> InferenceData : \"\"\"Returns arviz InferenceData object constructed most recent samples. Note: the InferenceData is constructed using the posterior samples generated in most recent call to `.sample(...)`. For Pyro HMC and NUTS kernels InferenceData will contain diagnostics, for Pyro Slice or sbi slice sampling samples, only the samples are added. Returns: inference_data: Arviz InferenceData object. \"\"\" assert ( self . _posterior_sampler is not None ), \"\"\"No samples have been generated, call .sample() first.\"\"\" sampler : Union [ MCMC , SliceSamplerSerial , SliceSamplerVectorized ] = self . _posterior_sampler # If Pyro sampler and samples not transformed, use arviz' from_pyro. # Exclude 'slice' kernel as it lacks the 'divergence' diagnostics key. if isinstance ( self . _posterior_sampler , ( HMC , NUTS )) and isinstance ( self . theta_transform , torch_tf . IndependentTransform ): inference_data = az . from_pyro ( sampler ) # otherwise get samples from sampler and transform to original space. else : transformed_samples = sampler . get_samples ( group_by_chain = True ) # Pyro samplers returns dicts, get values. if isinstance ( transformed_samples , Dict ): # popitem gets last items, [1] get the values as tensor. transformed_samples = transformed_samples . popitem ()[ 1 ] # Our slice samplers return numpy arrays. elif isinstance ( transformed_samples , ndarray ): transformed_samples = torch . from_numpy ( transformed_samples ) . type ( torch . float32 ) # For MultipleIndependent priors transforms first dim must be batch dim. # thus, reshape back and forth to have batch dim in front. samples_shape = transformed_samples . shape samples = self . theta_transform . inv ( # type: ignore transformed_samples . reshape ( - 1 , samples_shape [ - 1 ]) ) . reshape ( # type: ignore * samples_shape ) inference_data = az . convert_to_inference_data ( { f \" { self . param_name } \" : samples } ) return inference_data log_prob ( self , theta , x = None , track_gradients = False ) \u00b6 Returns the log-probability of theta under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Returns: Type Description Tensor len($\\theta$) -shaped log-probability. Source code in sbi/inference/posteriors/mcmc_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Returns the log-probability of theta under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. Returns: `len($\\theta$)`-shaped log-probability. \"\"\" warn ( \"\"\"`.log_prob()` is deprecated for methods that can only evaluate the log-probability up to a normalizing constant. Use `.potential()` instead.\"\"\" ) warn ( \"The log-probability is unnormalized!\" ) self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'proposal' , num_init_samples = 1000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[torch.Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, torch.Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'proposal' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 1000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/mcmc_posterior.py def map ( self , x : Optional [ Tensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , Tensor ] = \"proposal\" , num_init_samples : int = 1_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/mcmc_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , method = None , thin = None , warmup_steps = None , num_chains = None , init_strategy = None , init_strategy_parameters = None , init_strategy_num_candidates = None , mcmc_parameters = {}, mcmc_method = None , sample_with = None , num_workers = None , show_progress_bars = True ) \u00b6 Return samples from posterior distribution \\(p(\\theta|x)\\) with MCMC. Check the __init__() method for a description of all arguments as well as their default values. Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw sample_shape.numel() samples and then reshape into the desired shape. torch.Size([]) mcmc_parameters Dict Dictionary that is passed only to support the API of sbi v0.17.2 or older. {} mcmc_method Optional[str] This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. Please use method instead. None sample_with Optional[str] This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. If it is set, we instantly raise an error. None show_progress_bars bool Whether to show sampling progress monitor. True Returns: Type Description Tensor Samples from posterior. Source code in sbi/inference/posteriors/mcmc_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , method : Optional [ str ] = None , thin : Optional [ int ] = None , warmup_steps : Optional [ int ] = None , num_chains : Optional [ int ] = None , init_strategy : Optional [ str ] = None , init_strategy_parameters : Optional [ Dict [ str , Any ]] = None , init_strategy_num_candidates : Optional [ int ] = None , mcmc_parameters : Dict = {}, mcmc_method : Optional [ str ] = None , sample_with : Optional [ str ] = None , num_workers : Optional [ int ] = None , show_progress_bars : bool = True , ) -> Tensor : r \"\"\"Return samples from posterior distribution $p(\\theta|x)$ with MCMC. Check the `__init__()` method for a description of all arguments as well as their default values. Args: sample_shape: Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw `sample_shape.numel()` samples and then reshape into the desired shape. mcmc_parameters: Dictionary that is passed only to support the API of `sbi` v0.17.2 or older. mcmc_method: This argument only exists to keep backward-compatibility with `sbi` v0.17.2 or older. Please use `method` instead. sample_with: This argument only exists to keep backward-compatibility with `sbi` v0.17.2 or older. If it is set, we instantly raise an error. show_progress_bars: Whether to show sampling progress monitor. Returns: Samples from posterior. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) # Replace arguments that were not passed with their default. method = self . method if method is None else method thin = self . thin if thin is None else thin warmup_steps = self . warmup_steps if warmup_steps is None else warmup_steps num_chains = self . num_chains if num_chains is None else num_chains init_strategy = self . init_strategy if init_strategy is None else init_strategy num_workers = self . num_workers if num_workers is None else num_workers init_strategy_parameters = ( self . init_strategy_parameters if init_strategy_parameters is None else init_strategy_parameters ) if init_strategy_num_candidates is not None : warn ( \"\"\"Passing `init_strategy_num_candidates` is deprecated as of sbi v0.19.0. Instead, use e.g., `init_strategy_parameters={\"num_candidate_samples\": 1000}`\"\"\" ) self . init_strategy_parameters [ \"num_candidate_samples\" ] = init_strategy_num_candidates if sample_with is not None : raise ValueError ( f \"You set `sample_with= { sample_with } `. As of sbi v0.18.0, setting \" f \"`sample_with` is no longer supported. You have to rerun \" f \"`.build_posterior(sample_with= { sample_with } ).`\" ) if mcmc_method is not None : warn ( \"You passed `mcmc_method` to `.sample()`. As of sbi v0.18.0, this \" \"is deprecated and will be removed in a future release. Use `method` \" \"instead of `mcmc_method`.\" ) method = mcmc_method if mcmc_parameters : warn ( \"You passed `mcmc_parameters` to `.sample()`. As of sbi v0.18.0, this \" \"is deprecated and will be removed in a future release. Instead, pass \" \"the variable to `.sample()` directly, e.g. \" \"`posterior.sample((1,), num_chains=5)`.\" ) # The following lines are only for backwards compatibility with sbi v0.17.2 or # older. m_p = mcmc_parameters # define to shorten the variable name method = _maybe_use_dict_entry ( method , \"mcmc_method\" , m_p ) thin = _maybe_use_dict_entry ( thin , \"thin\" , m_p ) warmup_steps = _maybe_use_dict_entry ( warmup_steps , \"warmup_steps\" , m_p ) num_chains = _maybe_use_dict_entry ( num_chains , \"num_chains\" , m_p ) init_strategy = _maybe_use_dict_entry ( init_strategy , \"init_strategy\" , m_p ) self . potential_ = self . _prepare_potential ( method ) # type: ignore initial_params = self . _get_initial_params ( init_strategy , # type: ignore num_chains , # type: ignore num_workers , show_progress_bars , ** init_strategy_parameters , ) num_samples = torch . Size ( sample_shape ) . numel () track_gradients = method in ( \"hmc\" , \"nuts\" ) with torch . set_grad_enabled ( track_gradients ): if method in ( \"slice_np\" , \"slice_np_vectorized\" ): transformed_samples = self . _slice_np_mcmc ( num_samples = num_samples , potential_function = self . potential_ , initial_params = initial_params , thin = thin , # type: ignore warmup_steps = warmup_steps , # type: ignore vectorized = ( method == \"slice_np_vectorized\" ), num_workers = num_workers , show_progress_bars = show_progress_bars , ) elif method in ( \"hmc\" , \"nuts\" , \"slice\" ): transformed_samples = self . _pyro_mcmc ( num_samples = num_samples , potential_function = self . potential_ , initial_params = initial_params , mcmc_method = method , # type: ignore thin = thin , # type: ignore warmup_steps = warmup_steps , # type: ignore num_chains = num_chains , show_progress_bars = show_progress_bars , ) else : raise NameError samples = self . theta_transform . inv ( transformed_samples ) return samples . reshape (( * sample_shape , - 1 )) # type: ignore set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/mcmc_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self set_mcmc_method ( self , method ) \u00b6 Sets sampling method to for MCMC and returns NeuralPosterior . Parameters: Name Type Description Default method str Method to use. required Returns: Type Description NeuralPosterior NeuralPosterior for chainable calls. Source code in sbi/inference/posteriors/mcmc_posterior.py def set_mcmc_method ( self , method : str ) -> \"NeuralPosterior\" : \"\"\"Sets sampling method to for MCMC and returns `NeuralPosterior`. Args: method: Method to use. Returns: `NeuralPosterior` for chainable calls. \"\"\" self . _mcmc_method = method return self sbi.inference.posteriors.rejection_posterior.RejectionPosterior ( NeuralPosterior ) \u00b6 Provides rejection sampling to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). RejectionPosterior allows to sample from the posterior with rejection sampling. default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. __init__ ( self , potential_fn , proposal , theta_transform = None , max_sampling_batch_size = 10000 , num_samples_to_find_max = 10000 , num_iter_to_find_max = 100 , m = 1.2 , device = None , x_shape = None ) special \u00b6 Parameters: Name Type Description Default potential_fn Callable The potential function from which to draw samples. required proposal Any The proposal distribution. required theta_transform Optional[torch Transform] Transformation that is applied to parameters. Is not used during but only when calling .map() . None max_sampling_batch_size int The batchsize of samples being drawn from the proposal at every iteration. 10000 num_samples_to_find_max int The number of samples that are used to find the maximum of the potential_fn / proposal ratio. 10000 num_iter_to_find_max int The number of gradient ascent iterations to find the maximum of the potential_fn / proposal ratio. 100 m float Multiplier to the potential_fn / proposal ratio. 1.2 device Optional[str] Training device, e.g., \u201ccpu\u201d, \u201ccuda\u201d or \u201ccuda:0\u201d. If None, potential_fn.device is used. None x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None Source code in sbi/inference/posteriors/rejection_posterior.py def __init__ ( self , potential_fn : Callable , proposal : Any , theta_transform : Optional [ TorchTransform ] = None , max_sampling_batch_size : int = 10_000 , num_samples_to_find_max : int = 10_000 , num_iter_to_find_max : int = 100 , m : float = 1.2 , device : Optional [ str ] = None , x_shape : Optional [ torch . Size ] = None , ): \"\"\" Args: potential_fn: The potential function from which to draw samples. proposal: The proposal distribution. theta_transform: Transformation that is applied to parameters. Is not used during but only when calling `.map()`. max_sampling_batch_size: The batchsize of samples being drawn from the proposal at every iteration. num_samples_to_find_max: The number of samples that are used to find the maximum of the `potential_fn / proposal` ratio. num_iter_to_find_max: The number of gradient ascent iterations to find the maximum of the `potential_fn / proposal` ratio. m: Multiplier to the `potential_fn / proposal` ratio. device: Training device, e.g., \"cpu\", \"cuda\" or \"cuda:0\". If None, `potential_fn.device` is used. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. \"\"\" super () . __init__ ( potential_fn , theta_transform = theta_transform , device = device , x_shape = x_shape , ) self . proposal = proposal self . max_sampling_batch_size = max_sampling_batch_size self . num_samples_to_find_max = num_samples_to_find_max self . num_iter_to_find_max = num_iter_to_find_max self . m = m self . _purpose = ( \"It provides rejection sampling to .sample() from the posterior and \" \"can evaluate the _unnormalized_ posterior density with .log_prob().\" ) log_prob ( self , theta , x = None , track_gradients = False ) \u00b6 Returns the log-probability of theta under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Returns: Type Description Tensor len($\\theta$) -shaped log-probability. Source code in sbi/inference/posteriors/rejection_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Returns the log-probability of theta under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. Returns: `len($\\theta$)`-shaped log-probability. \"\"\" warn ( \"`.log_prob()` is deprecated for methods that can only evaluate the log-probability up to a normalizing constant. Use `.potential()` instead.\" ) warn ( \"The log-probability is unnormalized!\" ) self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'proposal' , num_init_samples = 1000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[torch.Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, torch.Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'proposal' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 1000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/rejection_posterior.py def map ( self , x : Optional [ Tensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , Tensor ] = \"proposal\" , num_init_samples : int = 1_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/rejection_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , max_sampling_batch_size = None , num_samples_to_find_max = None , num_iter_to_find_max = None , m = None , sample_with = None , show_progress_bars = True ) \u00b6 Return samples from posterior \\(p(\\theta|x)\\) via rejection sampling. Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw sample_shape.numel() samples and then reshape into the desired shape. torch.Size([]) sample_with Optional[str] This argument only exists to keep backward-compatibility with sbi v0.17.2 or older. If it is set, we instantly raise an error. None show_progress_bars bool Whether to show sampling progress monitor. True Returns: Type Description Samples from posterior. Source code in sbi/inference/posteriors/rejection_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , max_sampling_batch_size : Optional [ int ] = None , num_samples_to_find_max : Optional [ int ] = None , num_iter_to_find_max : Optional [ int ] = None , m : Optional [ float ] = None , sample_with : Optional [ str ] = None , show_progress_bars : bool = True , ): r \"\"\"Return samples from posterior $p(\\theta|x)$ via rejection sampling. Args: sample_shape: Desired shape of samples that are drawn from posterior. If sample_shape is multidimensional we simply draw `sample_shape.numel()` samples and then reshape into the desired shape. sample_with: This argument only exists to keep backward-compatibility with `sbi` v0.17.2 or older. If it is set, we instantly raise an error. show_progress_bars: Whether to show sampling progress monitor. Returns: Samples from posterior. \"\"\" num_samples = torch . Size ( sample_shape ) . numel () self . potential_fn . set_x ( self . _x_else_default_x ( x )) potential = partial ( self . potential_fn , track_gradients = True ) if sample_with is not None : raise ValueError ( f \"You set `sample_with= { sample_with } `. As of sbi v0.18.0, setting \" f \"`sample_with` is no longer supported. You have to rerun \" f \"`.build_posterior(sample_with= { sample_with } ).`\" ) # Replace arguments that were not passed with their default. max_sampling_batch_size = ( self . max_sampling_batch_size if max_sampling_batch_size is None else max_sampling_batch_size ) num_samples_to_find_max = ( self . num_samples_to_find_max if num_samples_to_find_max is None else num_samples_to_find_max ) num_iter_to_find_max = ( self . num_iter_to_find_max if num_iter_to_find_max is None else num_iter_to_find_max ) m = self . m if m is None else m samples , _ = rejection_sample ( potential , proposal = self . proposal , num_samples = num_samples , show_progress_bars = show_progress_bars , warn_acceptance = 0.01 , max_sampling_batch_size = max_sampling_batch_size , num_samples_to_find_max = num_samples_to_find_max , num_iter_to_find_max = num_iter_to_find_max , m = m , device = self . _device , ) return samples . reshape (( * sample_shape , - 1 )) set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/rejection_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self sbi.inference.posteriors.vi_posterior.VIPosterior ( NeuralPosterior ) \u00b6 Provides VI (Variational Inference) to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). VIPosterior allows to learn a tractable variational posterior \\(q(\\theta)\\) which approximates the true posterior \\(p(\\theta|x_o)\\) . After this second training stage, we can produce approximate posterior samples, by just sampling from q with no additional cost. For additional information see [1] and [2]. References: [1] Variational methods for simulation-based inference, Manuel Gl\u00f6ckler, Michael Deistler, Jakob Macke, 2022, https://openreview.net/forum?id=kZ0UYdhqkNY [2] Sequential Neural Posterior and Likelihood Approximation, Samuel Wiqvist, Jes Frellsen, Umberto Picchini, 2021, https://arxiv.org/abs/2102.06522 default_x : Optional [ torch . Tensor ] inherited property writable \u00b6 Return default x used by .sample(), .log_prob as conditioning context. q : Distribution property writable \u00b6 Returns the variational posterior. vi_method : str property writable \u00b6 Variational inference method e.g. one of [rKL, fKL, IW, alpha]. __init__ ( self , potential_fn , prior = None , q = 'maf' , theta_transform = None , vi_method = 'rKL' , device = 'cpu' , x_shape = None , parameters = [], modules = []) special \u00b6 Parameters: Name Type Description Default potential_fn Callable The potential function from which to draw samples. required prior Optional[torch Distribution] This is the prior distribution. Note that this is only used to check/construct the variational distribution or within some quality metrics. Please make sure that this matches with the prior within the potential_fn. If None is given, we will try to infer it from potential_fn or q, if this fails we raise an Error. None q Union[str, pyro.distributions.torch.TransformedDistribution, VIPosterior, Callable] Variational distribution, either string, TransformedDistribution , or a VIPosterior object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. You can also specify your own variational family by passing a pyro TransformedDistribution . Additionally, we allow a Callable , which allows you the pass a builder function, which if called returns a distribution. This may be useful for setting the hyperparameters e.g. num_transfroms within the get_flow_builder method specifying the number of transformations within a normalizing flow. If q is already a VIPosterior , then the arguments will be copied from it (relevant for multi-round training). 'maf' theta_transform Optional[torch Transform] Maps form prior support to unconstrained space. The inverse is used here to ensure that the posterior support is equal to that of the prior. None vi_method str This specifies the variational methods which are used to fit q to the posterior. We currently support [rKL, fKL, IW, alpha]. Note that some of the divergences are mode seeking i.e. they underestimate variance and collapse on multimodal targets ( rKL , alpha for alpha > 1) and some are mass covering i.e. they overestimate variance but typically cover all modes ( fKL , IW , alpha for alpha < 1). 'rKL' device str Training device, e.g., cpu , cuda or cuda:0 . We will ensure that all other objects are also on this device. 'cpu' x_shape Optional[torch.Size] Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. None parameters Iterable List of parameters of the variational posterior. This is only required for user-defined q i.e. if q does not have a parameters attribute. [] modules Iterable List of modules of the variational posterior. This is only required for user-defined q i.e. if q does not have a modules attribute. [] Source code in sbi/inference/posteriors/vi_posterior.py def __init__ ( self , potential_fn : Callable , prior : Optional [ TorchDistribution ] = None , q : Union [ str , PyroTransformedDistribution , \"VIPosterior\" , Callable ] = \"maf\" , theta_transform : Optional [ TorchTransform ] = None , vi_method : str = \"rKL\" , device : str = \"cpu\" , x_shape : Optional [ torch . Size ] = None , parameters : Iterable = [], modules : Iterable = [], ): \"\"\" Args: potential_fn: The potential function from which to draw samples. prior: This is the prior distribution. Note that this is only used to check/construct the variational distribution or within some quality metrics. Please make sure that this matches with the prior within the potential_fn. If `None` is given, we will try to infer it from potential_fn or q, if this fails we raise an Error. q: Variational distribution, either string, `TransformedDistribution`, or a `VIPosterior` object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. You can also specify your own variational family by passing a pyro `TransformedDistribution`. Additionally, we allow a `Callable`, which allows you the pass a `builder` function, which if called returns a distribution. This may be useful for setting the hyperparameters e.g. `num_transfroms` within the `get_flow_builder` method specifying the number of transformations within a normalizing flow. If q is already a `VIPosterior`, then the arguments will be copied from it (relevant for multi-round training). theta_transform: Maps form prior support to unconstrained space. The inverse is used here to ensure that the posterior support is equal to that of the prior. vi_method: This specifies the variational methods which are used to fit q to the posterior. We currently support [rKL, fKL, IW, alpha]. Note that some of the divergences are `mode seeking` i.e. they underestimate variance and collapse on multimodal targets (`rKL`, `alpha` for alpha > 1) and some are `mass covering` i.e. they overestimate variance but typically cover all modes (`fKL`, `IW`, `alpha` for alpha < 1). device: Training device, e.g., `cpu`, `cuda` or `cuda:0`. We will ensure that all other objects are also on this device. x_shape: Shape of a single simulator output. If passed, it is used to check the shape of the observed data and give a descriptive error. parameters: List of parameters of the variational posterior. This is only required for user-defined q i.e. if q does not have a `parameters` attribute. modules: List of modules of the variational posterior. This is only required for user-defined q i.e. if q does not have a `modules` attribute. \"\"\" super () . __init__ ( potential_fn , theta_transform , device , x_shape = x_shape ) # Especially the prior may be on another device -> move it... self . _device = device self . potential_fn . device = device move_all_tensor_to_device ( self . potential_fn , device ) # Get prior and previous builds if prior is not None : self . _prior = prior elif hasattr ( self . potential_fn , \"prior\" ) and isinstance ( self . potential_fn . prior , Distribution ): self . _prior = self . potential_fn . prior elif isinstance ( q , VIPosterior ) and isinstance ( q . _prior , Distribution ): self . _prior = q . _prior else : raise ValueError ( \"We could not find a suitable prior distribution within `potential_fn`\" \"or `q` (if a VIPosterior is given). Please explicitly specify a prior.\" ) move_all_tensor_to_device ( self . _prior , device ) self . _optimizer = None # In contrast to MCMC we want to project into constrained space. if theta_transform is None : self . link_transform = mcmc_transform ( self . _prior ) . inv else : self . link_transform = theta_transform . inv # This will set the variational distribution and VI method self . set_q ( q , parameters = parameters , modules = modules ) self . set_vi_method ( vi_method ) self . _purpose = ( \"It provides Variational inference to .sample() from the posterior and \" \"can evaluate the _normalized_ posterior density with .log_prob().\" ) evaluate ( self , quality_control_metric = 'psis' , N = 50000 ) \u00b6 This function will evaluate the quality of the variational posterior distribution. We currently support two different metrics of type psis , which checks the quality based on the tails of importance weights (there should not be much with a large one), or prop which checks the proportionality between q and potential_fn. NOTE: In our experience prop is sensitive to distinguish good from ok whereas psis is more sensitive in distinguishing very bad from ok . Parameters: Name Type Description Default quality_control_metric str The metric of choice, we currently support [psis, prop, prop_prior]. 'psis' N int Number of samples which is used to evaluate the metric. 50000 Source code in sbi/inference/posteriors/vi_posterior.py def evaluate ( self , quality_control_metric : str = \"psis\" , N : int = int ( 5e4 )) -> None : \"\"\"This function will evaluate the quality of the variational posterior distribution. We currently support two different metrics of type `psis`, which checks the quality based on the tails of importance weights (there should not be much with a large one), or `prop` which checks the proportionality between q and potential_fn. NOTE: In our experience `prop` is sensitive to distinguish ``good`` from ``ok`` whereas `psis` is more sensitive in distinguishing `very bad` from `ok`. Args: quality_control_metric: The metric of choice, we currently support [psis, prop, prop_prior]. N: Number of samples which is used to evaluate the metric. \"\"\" quality_control_fn , quality_control_msg = get_quality_metric ( quality_control_metric ) metric = round ( float ( quality_control_fn ( self , N = N )), 3 ) print ( f \"Quality Score: { metric } \" + quality_control_msg ) log_prob ( self , theta , x = None , track_gradients = False ) \u00b6 Returns the log-probability of theta under the variational posterior. Parameters: Name Type Description Default theta Tensor Parameters required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis but increases memory consumption. False Returns: Type Description Tensor len($\\theta$) -shaped log-probability. Source code in sbi/inference/posteriors/vi_posterior.py def log_prob ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False , ) -> Tensor : r \"\"\"Returns the log-probability of theta under the variational posterior. Args: theta: Parameters track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis but increases memory consumption. Returns: `len($\\theta$)`-shaped log-probability. \"\"\" x = self . _x_else_default_x ( x ) if self . _trained_on is None or ( x != self . _trained_on ) . all (): raise AttributeError ( f \"The variational posterior was not fit using observation { x } . \\ Please train.\" ) with torch . set_grad_enabled ( track_gradients ): theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . q . log_prob ( theta ) map ( self , x = None , num_iter = 1000 , num_to_optimize = 100 , learning_rate = 0.01 , init_method = 'proposal' , num_init_samples = 10000 , save_best_every = 10 , show_progress_bars = False , force_update = False ) \u00b6 Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in self._map and can be accessed with self.map() . The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a BoxUniform , we carry out the optimization in unbounded space and transform the result back into bounded space. Parameters: Name Type Description Default x Optional[Tensor] Deprecated - use .set_default_x() prior to .map() . None num_iter int Number of optimization steps that the algorithm takes to find the MAP. 1000 learning_rate float Learning rate of the optimizer. 0.01 init_method Union[str, Tensor] How to select the starting parameters for the optimization. If it is a string, it can be either [ posterior , prior ], which samples the respective distribution num_init_samples times. If it is a tensor, the tensor will be used as init locations. 'proposal' num_init_samples int Draw this number of samples from the posterior and evaluate the log-probability of all of them. 10000 num_to_optimize int From the drawn num_init_samples , use the num_to_optimize with highest log-probability as the initial points for the optimization. 100 save_best_every int The best log-probability is computed, saved in the map -attribute, and printed every save_best_every -th iteration. Computing the best log-probability creates a significant overhead (thus, the default is 10 .) 10 show_progress_bars bool Whether to show a progressbar during sampling from the posterior. False force_update bool Whether to re-calculate the MAP when x is unchanged and have a cached value. False log_prob_kwargs Will be empty for SNLE and SNRE. Will contain {\u2018norm_posterior\u2019: True} for SNPE. required Returns: Type Description Tensor The MAP estimate. Source code in sbi/inference/posteriors/vi_posterior.py def map ( self , x : Optional [ TorchTensor ] = None , num_iter : int = 1_000 , num_to_optimize : int = 100 , learning_rate : float = 0.01 , init_method : Union [ str , TorchTensor ] = \"proposal\" , num_init_samples : int = 10_000 , save_best_every : int = 10 , show_progress_bars : bool = False , force_update : bool = False , ) -> Tensor : r \"\"\"Returns the maximum-a-posteriori estimate (MAP). The method can be interrupted (Ctrl-C) when the user sees that the log-probability converges. The best estimate will be saved in `self._map` and can be accessed with `self.map()`. The MAP is obtained by running gradient ascent from a given number of starting positions (samples from the posterior with the highest log-probability). After the optimization is done, we select the parameter set that has the highest log-probability after the optimization. Warning: The default values used by this function are not well-tested. They might require hand-tuning for the problem at hand. For developers: if the prior is a `BoxUniform`, we carry out the optimization in unbounded space and transform the result back into bounded space. Args: x: Deprecated - use `.set_default_x()` prior to `.map()`. num_iter: Number of optimization steps that the algorithm takes to find the MAP. learning_rate: Learning rate of the optimizer. init_method: How to select the starting parameters for the optimization. If it is a string, it can be either [`posterior`, `prior`], which samples the respective distribution `num_init_samples` times. If it is a tensor, the tensor will be used as init locations. num_init_samples: Draw this number of samples from the posterior and evaluate the log-probability of all of them. num_to_optimize: From the drawn `num_init_samples`, use the `num_to_optimize` with highest log-probability as the initial points for the optimization. save_best_every: The best log-probability is computed, saved in the `map`-attribute, and printed every `save_best_every`-th iteration. Computing the best log-probability creates a significant overhead (thus, the default is `10`.) show_progress_bars: Whether to show a progressbar during sampling from the posterior. force_update: Whether to re-calculate the MAP when x is unchanged and have a cached value. log_prob_kwargs: Will be empty for SNLE and SNRE. Will contain {'norm_posterior': True} for SNPE. Returns: The MAP estimate. \"\"\" self . proposal = self . q return super () . map ( x = x , num_iter = num_iter , num_to_optimize = num_to_optimize , learning_rate = learning_rate , init_method = init_method , num_init_samples = num_init_samples , save_best_every = save_best_every , show_progress_bars = show_progress_bars , force_update = force_update , ) potential ( self , theta , x = None , track_gradients = False ) inherited \u00b6 Evaluates \\(\\theta\\) under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of \\(\\theta\\) under the posterior. Parameters: Name Type Description Default theta Tensor Parameters \\(\\theta\\) . required track_gradients bool Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. False Source code in sbi/inference/posteriors/vi_posterior.py def potential ( self , theta : Tensor , x : Optional [ Tensor ] = None , track_gradients : bool = False ) -> Tensor : r \"\"\"Evaluates $\\theta$ under the potential that is used to sample the posterior. The potential is the unnormalized log-probability of $\\theta$ under the posterior. Args: theta: Parameters $\\theta$. track_gradients: Whether the returned tensor supports tracking gradients. This can be helpful for e.g. sensitivity analysis, but increases memory consumption. \"\"\" self . potential_fn . set_x ( self . _x_else_default_x ( x )) theta = ensure_theta_batched ( torch . as_tensor ( theta )) return self . potential_fn ( theta . to ( self . _device ), track_gradients = track_gradients ) sample ( self , sample_shape = torch . Size ([]), x = None , ** kwargs ) \u00b6 Samples from the variational posterior distribution. Parameters: Name Type Description Default sample_shape Union[torch.Size, Tuple[int, ...]] Shape of samples torch.Size([]) Returns: Type Description Tensor Samples from posterior. Source code in sbi/inference/posteriors/vi_posterior.py def sample ( self , sample_shape : Shape = torch . Size (), x : Optional [ Tensor ] = None , ** kwargs , ) -> Tensor : \"\"\"Samples from the variational posterior distribution. Args: sample_shape: Shape of samples Returns: Samples from posterior. \"\"\" x = self . _x_else_default_x ( x ) if self . _trained_on is None or ( x != self . _trained_on ) . all (): raise AttributeError ( f \"The variational posterior was not fit on the specified `default_x` \" f \" { x } . Please train using `posterior.train()`.\" ) samples = self . q . sample ( torch . Size ( sample_shape )) return samples . reshape (( * sample_shape , samples . shape [ - 1 ])) set_default_x ( self , x ) inherited \u00b6 Set new default x for .sample(), .log_prob to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify x in calls to .sample() and .log_prob() - only $ heta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular x=x_o (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like posterior.set_default_x(my_x).sample(mytheta) are possible. Parameters: Name Type Description Default x Tensor The default observation to set for the posterior \\(p( heta|x)\\) . required Returns: Type Description NeuralPosterior NeuralPosterior that will use a default x when not explicitly passed. Source code in sbi/inference/posteriors/vi_posterior.py def set_default_x ( self , x : Tensor ) -> \"NeuralPosterior\" : \"\"\"Set new default x for `.sample(), .log_prob` to use as conditioning context. Reset the MAP stored for the old default x if applicable. This is a pure convenience to avoid having to repeatedly specify `x` in calls to `.sample()` and `.log_prob()` - only $\\theta$ needs to be passed. This convenience is particularly useful when the posterior is focused, i.e. has been trained over multiple rounds to be accurate in the vicinity of a particular `x=x_o` (you can check if your posterior object is focused by printing it). NOTE: this method is chainable, i.e. will return the NeuralPosterior object so that calls like `posterior.set_default_x(my_x).sample(mytheta)` are possible. Args: x: The default observation to set for the posterior $p(\\theta|x)$. Returns: `NeuralPosterior` that will use a default `x` when not explicitly passed. \"\"\" self . _x = process_x ( x , x_shape = self . _x_shape , allow_iid_x = self . potential_fn . allow_iid_x ) . to ( self . _device ) self . _map = None return self set_q ( self , q , parameters = [], modules = []) \u00b6 Defines the variational family. You can specify over which parameters/modules we optimize. This is required for custom distributions which e.g. do not inherit nn.Modules or has the function parameters or modules to give direct access to trainable parameters. Further, you can pass a function, which constructs a variational distribution if called. Parameters: Name Type Description Default q Union[str, pyro.distributions.torch.TransformedDistribution, VIPosterior, Callable] Variational distribution, either string, distribution, or a VIPosterior object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. Of course, you can also specify your own variational family by passing a parameterized distribution object i.e. a torch.distributions Distribution with methods parameters returning an iterable of all parameters (you can pass them within the paramters/modules attribute). Additionally, we allow a Callable , which allows you the pass a builder function, which if called returns an distribution. This may be useful for setting the hyperparameters e.g. num_transfroms:int by using the get_flow_builder method specifying the hyperparameters. If q is already a VIPosterior , then the arguments will be copied from it (relevant for multi-round training). required parameters Iterable List of parameters associated with the distribution object. [] modules Iterable List of modules associated with the distribution object. [] Source code in sbi/inference/posteriors/vi_posterior.py def set_q ( self , q : Union [ str , PyroTransformedDistribution , \"VIPosterior\" , Callable ], parameters : Iterable = [], modules : Iterable = [], ) -> None : \"\"\"Defines the variational family. You can specify over which parameters/modules we optimize. This is required for custom distributions which e.g. do not inherit nn.Modules or has the function `parameters` or `modules` to give direct access to trainable parameters. Further, you can pass a function, which constructs a variational distribution if called. Args: q: Variational distribution, either string, distribution, or a VIPosterior object. This specifies a parametric class of distribution over which the best possible posterior approximation is searched. For string input, we currently support [nsf, scf, maf, mcf, gaussian, gaussian_diag]. Of course, you can also specify your own variational family by passing a `parameterized` distribution object i.e. a torch.distributions Distribution with methods `parameters` returning an iterable of all parameters (you can pass them within the paramters/modules attribute). Additionally, we allow a `Callable`, which allows you the pass a `builder` function, which if called returns an distribution. This may be useful for setting the hyperparameters e.g. `num_transfroms:int` by using the `get_flow_builder` method specifying the hyperparameters. If q is already a `VIPosterior`, then the arguments will be copied from it (relevant for multi-round training). parameters: List of parameters associated with the distribution object. modules: List of modules associated with the distribution object. \"\"\" self . _q_arg = q if isinstance ( q , Distribution ): q = adapt_variational_distribution ( q , self . _prior , self . link_transform , parameters = parameters , modules = modules , ) make_object_deepcopy_compatible ( q ) self_custom_q_init_cache = deepcopy ( q ) self . _q_build_fn = lambda * args , ** kwargs : self_custom_q_init_cache self . _trained_on = None elif isinstance ( q , str ) or isinstance ( q , Callable ): if isinstance ( q , str ): self . _q_build_fn = get_flow_builder ( q ) else : self . _q_build_fn = q q = self . _q_build_fn ( self . _prior . event_shape , self . link_transform , device = self . _device , ) make_object_deepcopy_compatible ( q ) self . _trained_on = None elif isinstance ( q , VIPosterior ): self . _q_build_fn = q . _q_build_fn self . _trained_on = q . _trained_on self . vi_method = q . vi_method # type: ignore self . _device = q . _device self . _prior = q . _prior self . _x = q . _x self . _q_arg = q . _q_arg make_object_deepcopy_compatible ( q . q ) q = deepcopy ( q . q ) move_all_tensor_to_device ( q , self . _device ) assert isinstance ( q , Distribution ), \"\"\"Something went wrong when initializing the variational distribution. Please create an issue on github https://github.com/mackelab/sbi/issues\"\"\" check_variational_distribution ( q , self . _prior ) self . _q = q set_vi_method ( self , method ) \u00b6 Sets variational inference method. Parameters: Name Type Description Default method str One of [rKL, fKL, IW, alpha]. required Returns: Type Description VIPosterior VIPosterior for chainable calls. Source code in sbi/inference/posteriors/vi_posterior.py def set_vi_method ( self , method : str ) -> \"VIPosterior\" : \"\"\"Sets variational inference method. Args: method: One of [rKL, fKL, IW, alpha]. Returns: `VIPosterior` for chainable calls. \"\"\" self . _vi_method = method self . _optimizer_builder = get_VI_method ( method ) return self train ( self , x = None , n_particles = 256 , learning_rate = 0.001 , gamma = 0.999 , max_num_iters = 2000 , min_num_iters = 10 , clip_value = 10.0 , warm_up_rounds = 100 , retrain_from_scratch = False , reset_optimizer = False , show_progress_bar = True , check_for_convergence = True , quality_control = True , quality_control_metric = 'psis' , ** kwargs ) \u00b6 This method trains the variational posterior. Parameters: Name Type Description Default x Optional[Tensor] The observation. None n_particles int Number of samples to approximate expectations within the variational bounds. The larger the more accurate are gradient estimates, but the computational cost per iteration increases. 256 learning_rate float Learning rate of the optimizer. 0.001 gamma float Learning rate decay per iteration. We use an exponential decay scheduler. 0.999 max_num_iters int Maximum number of iterations. 2000 min_num_iters int Minimum number of iterations. 10 clip_value float Gradient clipping value, decreasing may help if you see invalid values. 10.0 warm_up_rounds int Initialize the posterior as the prior. 100 retrain_from_scratch bool Retrain the variational distributions from scratch. False reset_optimizer bool Reset the divergence optimizer False show_progress_bar bool If any progress report should be displayed. True quality_control bool If False quality control is skipped. True quality_control_metric str Which metric to use for evaluating the quality. 'psis' kwargs Hyperparameters check corresponding DivergenceOptimizer for detail eps: Determines sensitivity of convergence check. retain_graph: Boolean which decides whether to retain the computation graph. This may be required for some exotic user-specified q\u2019s. optimizer: A PyTorch Optimizer class e.g. Adam or SGD. See DivergenceOptimizer for details. scheduler: A PyTorch learning rate scheduler. See DivergenceOptimizer for details. alpha: Only used if vi_method= alpha . Determines the alpha divergence. K: Only used if vi_method= IW . Determines the number of importance weighted particles. stick_the_landing: If one should use the STL estimator (only for rKL, IW, alpha). dreg: If one should use the DREG estimator (only for rKL, IW, alpha). weight_transform: Callable applied to importance weights (only for fKL) {} Returns: Type Description VIPosterior VIPosterior (can be used to chain calls). Source code in sbi/inference/posteriors/vi_posterior.py def train ( self , x : Optional [ TorchTensor ] = None , n_particles : int = 256 , learning_rate : float = 1e-3 , gamma : float = 0.999 , max_num_iters : int = 2000 , min_num_iters : int = 10 , clip_value : float = 10.0 , warm_up_rounds : int = 100 , retrain_from_scratch : bool = False , reset_optimizer : bool = False , show_progress_bar : bool = True , check_for_convergence : bool = True , quality_control : bool = True , quality_control_metric : str = \"psis\" , ** kwargs , ) -> \"VIPosterior\" : \"\"\"This method trains the variational posterior. Args: x: The observation. n_particles: Number of samples to approximate expectations within the variational bounds. The larger the more accurate are gradient estimates, but the computational cost per iteration increases. learning_rate: Learning rate of the optimizer. gamma: Learning rate decay per iteration. We use an exponential decay scheduler. max_num_iters: Maximum number of iterations. min_num_iters: Minimum number of iterations. clip_value: Gradient clipping value, decreasing may help if you see invalid values. warm_up_rounds: Initialize the posterior as the prior. retrain_from_scratch: Retrain the variational distributions from scratch. reset_optimizer: Reset the divergence optimizer show_progress_bar: If any progress report should be displayed. quality_control: If False quality control is skipped. quality_control_metric: Which metric to use for evaluating the quality. kwargs: Hyperparameters check corresponding `DivergenceOptimizer` for detail eps: Determines sensitivity of convergence check. retain_graph: Boolean which decides whether to retain the computation graph. This may be required for some `exotic` user-specified q's. optimizer: A PyTorch Optimizer class e.g. Adam or SGD. See `DivergenceOptimizer` for details. scheduler: A PyTorch learning rate scheduler. See `DivergenceOptimizer` for details. alpha: Only used if vi_method=`alpha`. Determines the alpha divergence. K: Only used if vi_method=`IW`. Determines the number of importance weighted particles. stick_the_landing: If one should use the STL estimator (only for rKL, IW, alpha). dreg: If one should use the DREG estimator (only for rKL, IW, alpha). weight_transform: Callable applied to importance weights (only for fKL) Returns: VIPosterior: `VIPosterior` (can be used to chain calls). \"\"\" # Update optimizer with current arguments. if self . _optimizer is not None : self . _optimizer . update ({ ** locals (), ** kwargs }) # Init q and the optimizer if necessary if retrain_from_scratch : self . q = self . _q_build_fn () # type: ignore self . _optimizer = self . _optimizer_builder ( self . potential_fn , self . q , lr = learning_rate , clip_value = clip_value , gamma = gamma , n_particles = n_particles , prior = self . _prior , ** kwargs , ) if ( reset_optimizer or self . _optimizer is None or not isinstance ( self . _optimizer , self . _optimizer_builder ) ): self . _optimizer = self . _optimizer_builder ( self . potential_fn , self . q , lr = learning_rate , clip_value = clip_value , gamma = gamma , n_particles = n_particles , prior = self . _prior , ** kwargs , ) # Check context x = atleast_2d_float32_tensor ( self . _x_else_default_x ( x )) . to ( # type: ignore self . _device ) already_trained = self . _trained_on is not None and ( x == self . _trained_on ) . all () # Optimize optimizer = self . _optimizer optimizer . to ( self . _device ) optimizer . reset_loss_stats () if show_progress_bar : iters = tqdm ( range ( max_num_iters )) else : iters = range ( max_num_iters ) # Warmup before training if reset_optimizer or ( not optimizer . warm_up_was_done and not already_trained ): if show_progress_bar : iters . set_description ( # type: ignore \"Warmup phase, this may take a few seconds...\" ) optimizer . warm_up ( warm_up_rounds ) for i in iters : optimizer . step ( x ) mean_loss , std_loss = optimizer . get_loss_stats () # Update progress bar if show_progress_bar : assert isinstance ( iters , tqdm ) iters . set_description ( # type: ignore f \"Loss: { np . round ( float ( mean_loss ), 2 ) } \" f \"Std: { np . round ( float ( std_loss ), 2 ) } \" ) # Check for convergence if check_for_convergence and i > min_num_iters : if optimizer . converged (): if show_progress_bar : print ( f \" \\n Converged with loss: { np . round ( float ( mean_loss ), 2 ) } \" ) break # Training finished: self . _trained_on = x # Evaluate quality if quality_control : try : self . evaluate ( quality_control_metric = quality_control_metric ) except Exception as e : print ( f \"Quality control did not work, we reset the variational \\ posterior,please check your setting. \\ \\n Following error occured { e } \" ) self . train ( learning_rate = learning_rate * 0.1 , retrain_from_scratch = True , reset_optimizer = True , ) return self Models \u00b6 sbi . utils . get_nn_models . posterior_nn ( model , z_score_theta = 'independent' , z_score_x = 'independent' , hidden_features = 50 , num_transforms = 5 , num_bins = 10 , embedding_net = Identity (), num_components = 10 , ** kwargs ) \u00b6 Returns a function that builds a density estimator for learning the posterior. This function will usually be used for SNPE. The returned function is to be passed to the inference class when using the flexible interface. Parameters: Name Type Description Default model str The type of density estimator that will be created. One of [ mdn , made , maf , maf_rqs , nsf ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 num_transforms int Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a maf or a nsf ). Ignored if density estimator is a mdn or made . 5 num_bins int Number of bins used for the splines in nsf . Ignored if density estimator not nsf . 10 embedding_net Module Optional embedding network for simulation outputs \\(x\\) . This embedding net allows to learn features from potentially high-dimensional simulation outputs. Identity() num_components int Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. 10 kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def posterior_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , num_transforms : int = 5 , num_bins : int = 10 , embedding_net : nn . Module = nn . Identity (), num_components : int = 10 , ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a density estimator for learning the posterior. This function will usually be used for SNPE. The returned function is to be passed to the inference class when using the flexible interface. Args: model: The type of density estimator that will be created. One of [`mdn`, `made`, `maf`, `maf_rqs`, `nsf`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. num_transforms: Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a `maf` or a `nsf`). Ignored if density estimator is a `mdn` or `made`. num_bins: Number of bins used for the splines in `nsf`. Ignored if density estimator not `nsf`. embedding_net: Optional embedding network for simulation outputs $x$. This embedding net allows to learn features from potentially high-dimensional simulation outputs. num_components: Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"num_transforms\" , \"num_bins\" , \"embedding_net\" , \"num_components\" , ), ( z_score_theta , z_score_x , hidden_features , num_transforms , num_bins , embedding_net , num_components , ), ), ** kwargs , ) def build_fn_snpe_a ( batch_theta , batch_x , num_components ): \"\"\"Build function for SNPE-A Extract the number of components from the kwargs, such that they are exposed as a kwargs, offering the possibility to later override this kwarg with `functools.partial`. This is necessary in order to make sure that the MDN in SNPE-A only has one component when running the Algorithm 1 part. \"\"\" return build_mdn ( batch_x = batch_theta , batch_y = batch_x , num_components = num_components , ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"mdn\" : return build_mdn ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"made\" : return build_made ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"maf\" : return build_maf ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"maf_rqs\" : return build_maf_rqs ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"nsf\" : return build_nsf ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) else : raise NotImplementedError if model == \"mdn_snpe_a\" : if num_components != 10 : raise ValueError ( \"You set `num_components`. For SNPE-A, this has to be done at \" \"instantiation of the inference object, i.e. \" \"`inference = SNPE_A(..., num_components=20)`\" ) kwargs . pop ( \"num_components\" ) return build_fn_snpe_a if model == \"mdn_snpe_a\" else build_fn sbi . utils . get_nn_models . likelihood_nn ( model , z_score_theta = 'independent' , z_score_x = 'independent' , hidden_features = 50 , num_transforms = 5 , num_bins = 10 , embedding_net = Identity (), num_components = 10 , ** kwargs ) \u00b6 Returns a function that builds a density estimator for learning the likelihood. This function will usually be used for SNLE. The returned function is to be passed to the inference class when using the flexible interface. Parameters: Name Type Description Default model str The type of density estimator that will be created. One of [ mdn , made , maf , maf_rqs , nsf ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 num_transforms int Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a maf or a nsf ). Ignored if density estimator is a mdn or made . 5 num_bins int Number of bins used for the splines in nsf . Ignored if density estimator not nsf . 10 embedding_net Module Optional embedding network for parameters \\(\\theta\\) . Identity() num_components int Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. 10 kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def likelihood_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , num_transforms : int = 5 , num_bins : int = 10 , embedding_net : nn . Module = nn . Identity (), num_components : int = 10 , ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a density estimator for learning the likelihood. This function will usually be used for SNLE. The returned function is to be passed to the inference class when using the flexible interface. Args: model: The type of density estimator that will be created. One of [`mdn`, `made`, `maf`, `maf_rqs`, `nsf`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. num_transforms: Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a `maf` or a `nsf`). Ignored if density estimator is a `mdn` or `made`. num_bins: Number of bins used for the splines in `nsf`. Ignored if density estimator not `nsf`. embedding_net: Optional embedding network for parameters $\\theta$. num_components: Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"num_transforms\" , \"num_bins\" , \"embedding_net\" , \"num_components\" , ), ( z_score_x , z_score_theta , hidden_features , num_transforms , num_bins , embedding_net , num_components , ), ), ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"mdn\" : return build_mdn ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"made\" : return build_made ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"maf\" : return build_maf ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"maf_rqs\" : return build_maf_rqs ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"nsf\" : return build_nsf ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"mnle\" : return build_mnle ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) else : raise NotImplementedError return build_fn sbi . utils . get_nn_models . classifier_nn ( model , z_score_theta = 'independent' , z_score_x = 'independent' , hidden_features = 50 , embedding_net_theta = Identity (), embedding_net_x = Identity (), ** kwargs ) \u00b6 Returns a function that builds a classifier for learning density ratios. This function will usually be used for SNRE. The returned function is to be passed to the inference class when using the flexible interface. Note that in the view of the SNRE classifier we build below, x=theta and y=x. Parameters: Name Type Description Default model str The type of classifier that will be created. One of [ linear , mlp , resnet ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 embedding_net_theta Module Optional embedding network for parameters \\(\\theta\\) . Identity() embedding_net_x Module Optional embedding network for simulation outputs \\(x\\) . This embedding net allows to learn features from potentially high-dimensional simulation outputs. Identity() kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def classifier_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , embedding_net_theta : nn . Module = nn . Identity (), embedding_net_x : nn . Module = nn . Identity (), ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a classifier for learning density ratios. This function will usually be used for SNRE. The returned function is to be passed to the inference class when using the flexible interface. Note that in the view of the SNRE classifier we build below, x=theta and y=x. Args: model: The type of classifier that will be created. One of [`linear`, `mlp`, `resnet`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. embedding_net_theta: Optional embedding network for parameters $\\theta$. embedding_net_x: Optional embedding network for simulation outputs $x$. This embedding net allows to learn features from potentially high-dimensional simulation outputs. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"embedding_net_x\" , \"embedding_net_y\" , ), ( z_score_theta , z_score_x , hidden_features , embedding_net_theta , embedding_net_x , ), ), ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"linear\" : return build_linear_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) if model == \"mlp\" : return build_mlp_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) if model == \"resnet\" : return build_resnet_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) else : raise NotImplementedError return build_fn Potentials \u00b6 sbi . inference . potentials . posterior_based_potential . posterior_estimator_based_potential ( posterior_estimator , prior , x_o , enable_transform = True ) \u00b6 Returns the potential for posterior-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. The potential is the same as the log-probability of the posterior_estimator , but it is set to \\(-\\inf\\) outside of the prior bounds. Parameters: Name Type Description Default posterior_estimator Module The neural network modelling the posterior. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the posterior. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/posterior_based_potential.py def posterior_estimator_based_potential ( posterior_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns the potential for posterior-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. The potential is the same as the log-probability of the `posterior_estimator`, but it is set to $-\\inf$ outside of the prior bounds. Args: posterior_estimator: The neural network modelling the posterior. prior: The prior distribution. x_o: The observed data at which to evaluate the posterior. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( posterior_estimator . parameters ()) . device ) potential_fn = PosteriorBasedPotential ( posterior_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform sbi . inference . potentials . likelihood_based_potential . likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o , enable_transform = True ) \u00b6 Returns potential \\(\\log(p(x_o|\\theta)p(\\theta))\\) for likelihood-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Parameters: Name Type Description Default likelihood_estimator Module The neural network modelling the likelihood. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the likelihood. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function \\(p(x_o|\\theta)p(\\theta)\\) and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/likelihood_based_potential.py def likelihood_estimator_based_potential ( likelihood_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns potential $\\log(p(x_o|\\theta)p(\\theta))$ for likelihood-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Args: likelihood_estimator: The neural network modelling the likelihood. prior: The prior distribution. x_o: The observed data at which to evaluate the likelihood. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function $p(x_o|\\theta)p(\\theta)$ and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( likelihood_estimator . parameters ()) . device ) potential_fn = LikelihoodBasedPotential ( likelihood_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform sbi . inference . potentials . ratio_based_potential . ratio_estimator_based_potential ( ratio_estimator , prior , x_o , enable_transform = True ) \u00b6 Returns the potential for ratio-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Parameters: Name Type Description Default ratio_estimator Module The neural network modelling likelihood-to-evidence ratio. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the likelihood-to-evidence ratio. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/ratio_based_potential.py def ratio_estimator_based_potential ( ratio_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns the potential for ratio-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Args: ratio_estimator: The neural network modelling likelihood-to-evidence ratio. prior: The prior distribution. x_o: The observed data at which to evaluate the likelihood-to-evidence ratio. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( ratio_estimator . parameters ()) . device ) potential_fn = RatioBasedPotential ( ratio_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform Analysis \u00b6 sbi . analysis . plot . pairplot ( samples , points = None , limits = None , subset = None , offdiag = 'hist' , diag = 'hist' , figsize = ( 10 , 10 ), labels = None , ticks = [], upper = None , fig = None , axes = None , ** kwargs ) \u00b6 Plot samples in a 2D grid showing marginals and pairwise marginals. Each of the diagonal plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Each upper-diagonal plot can be interpreted as a 2D-marginal of the distribution. Parameters: Name Type Description Default samples Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Samples used to build the histogram. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] List of additional points to scatter. None limits Union[List, torch.Tensor] Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on). None offdiag Union[str, List[str]] Plotting style for upper diagonal, {hist, scatter, contour, cond, None}. 'hist' upper Optional[str] deprecated, use offdiag instead. None diag Union[str, List[str]] Plotting style for diagonal, {hist, cond, None}. 'hist' figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def pairplot ( samples : Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , limits : Optional [ Union [ List , torch . Tensor ]] = None , subset : Optional [ List [ int ]] = None , offdiag : Optional [ Union [ List [ str ], str ]] = \"hist\" , diag : Optional [ Union [ List [ str ], str ]] = \"hist\" , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], upper : Optional [ str ] = None , fig = None , axes = None , ** kwargs , ): \"\"\" Plot samples in a 2D grid showing marginals and pairwise marginals. Each of the diagonal plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Each upper-diagonal plot can be interpreted as a 2D-marginal of the distribution. Args: samples: Samples used to build the histogram. points: List of additional points to scatter. limits: Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on). offdiag: Plotting style for upper diagonal, {hist, scatter, contour, cond, None}. upper: deprecated, use offdiag instead. diag: Plotting style for diagonal, {hist, cond, None}. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" # TODO: add color map support # TODO: automatically determine good bin sizes for histograms # TODO: add legend (if legend is True) opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) samples , dim , limits = prepare_for_plot ( samples , limits ) # checks. if opts [ \"legend\" ]: assert len ( opts [ \"samples_labels\" ]) >= len ( samples ), \"Provide at least as many labels as samples.\" if opts [ \"upper\" ] is not None : warn ( \"upper is deprecated, use offdiag instead.\" ) opts [ \"offdiag\" ] = opts [ \"upper\" ] # Prepare diag/upper/lower if type ( opts [ \"diag\" ]) is not list : opts [ \"diag\" ] = [ opts [ \"diag\" ] for _ in range ( len ( samples ))] if type ( opts [ \"offdiag\" ]) is not list : opts [ \"offdiag\" ] = [ opts [ \"offdiag\" ] for _ in range ( len ( samples ))] # if type(opts['lower']) is not list: # opts['lower'] = [opts['lower'] for _ in range(len(samples))] opts [ \"lower\" ] = None diag_func = get_diag_func ( samples , limits , opts , ** kwargs ) def offdiag_func ( row , col , limits , ** kwargs ): if len ( samples ) > 0 : for n , v in enumerate ( samples ): if opts [ \"offdiag\" ][ n ] == \"hist\" or opts [ \"offdiag\" ][ n ] == \"hist2d\" : hist , xedges , yedges = np . histogram2d ( v [:, col ], v [:, row ], range = [ [ limits [ col ][ 0 ], limits [ col ][ 1 ]], [ limits [ row ][ 0 ], limits [ row ][ 1 ]], ], ** opts [ \"hist_offdiag\" ], ) plt . imshow ( hist . T , origin = \"lower\" , extent = ( xedges [ 0 ], xedges [ - 1 ], yedges [ 0 ], yedges [ - 1 ], ), aspect = \"auto\" , ) elif opts [ \"offdiag\" ][ n ] in [ \"kde\" , \"kde2d\" , \"contour\" , \"contourf\" , ]: density = gaussian_kde ( v [:, [ col , row ]] . T , bw_method = opts [ \"kde_offdiag\" ][ \"bw_method\" ], ) X , Y = np . meshgrid ( np . linspace ( limits [ col ][ 0 ], limits [ col ][ 1 ], opts [ \"kde_offdiag\" ][ \"bins\" ], ), np . linspace ( limits [ row ][ 0 ], limits [ row ][ 1 ], opts [ \"kde_offdiag\" ][ \"bins\" ], ), ) positions = np . vstack ([ X . ravel (), Y . ravel ()]) Z = np . reshape ( density ( positions ) . T , X . shape ) if opts [ \"offdiag\" ][ n ] == \"kde\" or opts [ \"offdiag\" ][ n ] == \"kde2d\" : plt . imshow ( Z , extent = ( limits [ col ][ 0 ], limits [ col ][ 1 ], limits [ row ][ 0 ], limits [ row ][ 1 ], ), origin = \"lower\" , aspect = \"auto\" , ) elif opts [ \"offdiag\" ][ n ] == \"contour\" : if opts [ \"contour_offdiag\" ][ \"percentile\" ]: Z = probs2contours ( Z , opts [ \"contour_offdiag\" ][ \"levels\" ]) else : Z = ( Z - Z . min ()) / ( Z . max () - Z . min ()) plt . contour ( X , Y , Z , origin = \"lower\" , extent = [ limits [ col ][ 0 ], limits [ col ][ 1 ], limits [ row ][ 0 ], limits [ row ][ 1 ], ], colors = opts [ \"samples_colors\" ][ n ], levels = opts [ \"contour_offdiag\" ][ \"levels\" ], ) else : pass elif opts [ \"offdiag\" ][ n ] == \"scatter\" : plt . scatter ( v [:, col ], v [:, row ], color = opts [ \"samples_colors\" ][ n ], ** opts [ \"scatter_offdiag\" ], ) elif opts [ \"offdiag\" ][ n ] == \"plot\" : plt . plot ( v [:, col ], v [:, row ], color = opts [ \"samples_colors\" ][ n ], ** opts [ \"plot_offdiag\" ], ) else : pass return _arrange_plots ( diag_func , offdiag_func , dim , limits , points , opts , fig = fig , axes = axes ) sbi . analysis . plot . marginal_plot ( samples , points = None , limits = None , subset = None , diag = 'hist' , figsize = ( 10 , 10 ), labels = None , ticks = [], fig = None , axes = None , ** kwargs ) \u00b6 Plot samples in a row showing 1D marginals of selected dimensions. Each of the plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Parameters: Name Type Description Default samples Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Samples used to build the histogram. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] List of additional points to scatter. None limits Union[List, torch.Tensor] Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on). None diag Optional[str] Plotting style for 1D marginals, {hist, kde cond, None}. 'hist' figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] points_colors Colors of the points . required fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def marginal_plot ( samples : Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , limits : Optional [ Union [ List , torch . Tensor ]] = None , subset : Optional [ List [ int ]] = None , diag : Optional [ str ] = \"hist\" , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], fig = None , axes = None , ** kwargs , ): \"\"\" Plot samples in a row showing 1D marginals of selected dimensions. Each of the plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Args: samples: Samples used to build the histogram. points: List of additional points to scatter. limits: Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on). diag: Plotting style for 1D marginals, {hist, kde cond, None}. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. points_colors: Colors of the `points`. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) samples , dim , limits = prepare_for_plot ( samples , limits ) # Prepare diag/upper/lower if type ( opts [ \"diag\" ]) is not list : opts [ \"diag\" ] = [ opts [ \"diag\" ] for _ in range ( len ( samples ))] diag_func = get_diag_func ( samples , limits , opts , ** kwargs ) return _arrange_plots ( diag_func , None , dim , limits , points , opts , fig = fig , axes = axes ) sbi . analysis . plot . conditional_pairplot ( density , condition , limits , points = None , subset = None , resolution = 50 , figsize = ( 10 , 10 ), labels = None , ticks = [], fig = None , axes = None , ** kwargs ) \u00b6 Plot conditional distribution given all other parameters. The conditionals can be interpreted as slices through the density at a location given by condition . For example: Say we have a 3D density with parameters \\(\\theta_0\\) , \\(\\theta_1\\) , \\(\\theta_2\\) and a condition \\(c\\) passed by the user in the condition argument. For the plot of \\(\\theta_0\\) on the diagonal, this will plot the conditional \\(p(\\theta_0 | \\theta_1=c[1], \\theta_2=c[2])\\) . For the upper diagonal of \\(\\theta_1\\) and \\(\\theta_2\\) , it will plot \\(p(\\theta_1, \\theta_2 | \\theta_0=c[0])\\) . All other diagonals and upper-diagonals are built in the corresponding way. Parameters: Name Type Description Default density Any Probability density with a log_prob() method. required condition Tensor Condition that all but the one/two regarded parameters are fixed to. The condition should be of shape (1, dim_theta), i.e. it could e.g. be a sample from the posterior distribution. required limits Union[List, torch.Tensor] Limits in between which each parameter will be evaluated. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Additional points to scatter. None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on) None resolution int Resolution of the grid at which we evaluate the pdf . 50 figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] points_colors Colors of the points . required fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def conditional_pairplot ( density : Any , condition : torch . Tensor , limits : Union [ List , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , subset : Optional [ List [ int ]] = None , resolution : int = 50 , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], fig = None , axes = None , ** kwargs , ): r \"\"\" Plot conditional distribution given all other parameters. The conditionals can be interpreted as slices through the `density` at a location given by `condition`. For example: Say we have a 3D density with parameters $\\theta_0$, $\\theta_1$, $\\theta_2$ and a condition $c$ passed by the user in the `condition` argument. For the plot of $\\theta_0$ on the diagonal, this will plot the conditional $p(\\theta_0 | \\theta_1=c[1], \\theta_2=c[2])$. For the upper diagonal of $\\theta_1$ and $\\theta_2$, it will plot $p(\\theta_1, \\theta_2 | \\theta_0=c[0])$. All other diagonals and upper-diagonals are built in the corresponding way. Args: density: Probability density with a `log_prob()` method. condition: Condition that all but the one/two regarded parameters are fixed to. The condition should be of shape (1, dim_theta), i.e. it could e.g. be a sample from the posterior distribution. limits: Limits in between which each parameter will be evaluated. points: Additional points to scatter. subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on) resolution: Resolution of the grid at which we evaluate the `pdf`. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. points_colors: Colors of the `points`. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" device = density . _device if hasattr ( density , \"_device\" ) else \"cpu\" # Setting these is required because _pairplot_scaffold will check if opts['diag'] is # `None`. This would break if opts has no key 'diag'. Same for 'upper'. diag = \"cond\" offdiag = \"cond\" opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) opts [ \"lower\" ] = None dim , limits , eps_margins = prepare_for_conditional_plot ( condition , opts ) diag_func = get_conditional_diag_func ( opts , limits , eps_margins , resolution ) def offdiag_func ( row , col , ** kwargs ): p_image = ( eval_conditional_density ( opts [ \"density\" ], opts [ \"condition\" ] . to ( device ), limits . to ( device ), row , col , resolution = resolution , eps_margins1 = eps_margins [ row ], eps_margins2 = eps_margins [ col ], ) . to ( \"cpu\" ) . numpy () ) plt . imshow ( p_image . T , origin = \"lower\" , extent = ( limits [ col , 0 ] . item (), limits [ col , 1 ] . item (), limits [ row , 0 ] . item (), limits [ row , 1 ] . item (), ), aspect = \"auto\" , ) return _arrange_plots ( diag_func , offdiag_func , dim , limits , points , opts , fig = fig , axes = axes ) sbi . analysis . conditional_density . conditional_corrcoeff ( density , limits , condition , subset = None , resolution = 50 ) \u00b6 Returns the conditional correlation matrix of a distribution. To compute the conditional distribution, we condition all but two parameters to values from condition , and then compute the Pearson correlation coefficient \\(\\rho\\) between the remaining two parameters under the distribution density . We do so for any pair of parameters specified in subset , thus creating a matrix containing conditional correlations between any pair of parameters. If condition is a batch of conditions, this function computes the conditional correlation matrix for each one of them and returns the mean. Parameters: Name Type Description Default density Any Probability density function with .log_prob() function. required limits Tensor Limits within which to evaluate the density . required condition Tensor Values to condition the density on. If a batch of conditions is passed, we compute the conditional correlation matrix for each of them and return the average conditional correlation matrix. required subset Optional[List[int]] Evaluate the conditional distribution only on a subset of dimensions. If None this function uses all dimensions. None resolution int Number of grid points on which the conditional distribution is evaluated. A higher value increases the accuracy of the estimated correlation but also increases the computational cost. 50 Returns: Average conditional correlation matrix of shape either (num_dim, num_dim) or (len(subset), len(subset)) if subset was specified. Source code in sbi/analysis/conditional_density.py def conditional_corrcoeff ( density : Any , limits : Tensor , condition : Tensor , subset : Optional [ List [ int ]] = None , resolution : int = 50 , ) -> Tensor : r \"\"\"Returns the conditional correlation matrix of a distribution. To compute the conditional distribution, we condition all but two parameters to values from `condition`, and then compute the Pearson correlation coefficient $\\rho$ between the remaining two parameters under the distribution `density`. We do so for any pair of parameters specified in `subset`, thus creating a matrix containing conditional correlations between any pair of parameters. If `condition` is a batch of conditions, this function computes the conditional correlation matrix for each one of them and returns the mean. Args: density: Probability density function with `.log_prob()` function. limits: Limits within which to evaluate the `density`. condition: Values to condition the `density` on. If a batch of conditions is passed, we compute the conditional correlation matrix for each of them and return the average conditional correlation matrix. subset: Evaluate the conditional distribution only on a subset of dimensions. If `None` this function uses all dimensions. resolution: Number of grid points on which the conditional distribution is evaluated. A higher value increases the accuracy of the estimated correlation but also increases the computational cost. Returns: Average conditional correlation matrix of shape either `(num_dim, num_dim)` or `(len(subset), len(subset))` if `subset` was specified. \"\"\" device = density . _device if hasattr ( density , \"_device\" ) else \"cpu\" subset_ = subset if subset is not None else range ( condition . shape [ 1 ]) correlation_matrices = [] for cond in condition : correlation_matrices . append ( torch . stack ( [ compute_corrcoeff ( eval_conditional_density ( density , cond . to ( device ), limits . to ( device ), dim1 = dim1 , dim2 = dim2 , resolution = resolution , ), limits [[ dim1 , dim2 ]] . to ( device ), ) for dim1 in subset_ for dim2 in subset_ if dim1 < dim2 ] ) ) average_correlations = torch . mean ( torch . stack ( correlation_matrices ), dim = 0 ) # `average_correlations` is still a vector containing the upper triangular entries. # Below, assemble them into a matrix: av_correlation_matrix = torch . zeros (( len ( subset_ ), len ( subset_ )), device = device ) triu_indices = torch . triu_indices ( row = len ( subset_ ), col = len ( subset_ ), offset = 1 , device = device ) av_correlation_matrix [ triu_indices [ 0 ], triu_indices [ 1 ]] = average_correlations # Make the matrix symmetric by copying upper diagonal to lower diagonal. av_correlation_matrix = torch . triu ( av_correlation_matrix ) + torch . tril ( av_correlation_matrix . T ) av_correlation_matrix . fill_diagonal_ ( 1.0 ) return av_correlation_matrix","title":"API Reference"},{"location":"reference/#api-reference","text":"","title":"API Reference"},{"location":"reference/#inference","text":"","title":"Inference"},{"location":"reference/#sbi.inference.base.infer","text":"Runs simulation-based inference and returns the posterior. This function provides a simple interface to run sbi. Inference is run for a single round and hence the returned posterior \\(p(\\theta|x)\\) can be sampled and evaluated for any \\(x\\) (i.e. it is amortized). The scope of this function is limited to the most essential features of sbi. For more flexibility (e.g. multi-round inference, different density estimators) please use the flexible interface described here: https://www.mackelab.org/sbi/tutorial/02_flexible_interface/ Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\mathrm{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required prior Distribution A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with .log_prob() and .sample() (for example, a PyTorch distribution) can be used. required method str What inference method to use. Either of SNPE, SNLE or SNRE. required num_simulations int Number of simulation calls. More simulations means a longer runtime, but a better posterior estimate. required num_workers int Number of parallel workers to use for simulations. 1 Returns: Posterior over parameters conditional on observations (amortized). Source code in sbi/inference/base.py def infer ( simulator : Callable , prior : Distribution , method : str , num_simulations : int , num_workers : int = 1 , ) -> NeuralPosterior : r \"\"\"Runs simulation-based inference and returns the posterior. This function provides a simple interface to run sbi. Inference is run for a single round and hence the returned posterior $p(\\theta|x)$ can be sampled and evaluated for any $x$ (i.e. it is amortized). The scope of this function is limited to the most essential features of sbi. For more flexibility (e.g. multi-round inference, different density estimators) please use the flexible interface described here: https://www.mackelab.org/sbi/tutorial/02_flexible_interface/ Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\mathrm{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. prior: A probability distribution that expresses prior knowledge about the parameters, e.g. which ranges are meaningful for them. Any object with `.log_prob()`and `.sample()` (for example, a PyTorch distribution) can be used. method: What inference method to use. Either of SNPE, SNLE or SNRE. num_simulations: Number of simulation calls. More simulations means a longer runtime, but a better posterior estimate. num_workers: Number of parallel workers to use for simulations. Returns: Posterior over parameters conditional on observations (amortized). \"\"\" try : method_fun : Callable = getattr ( sbi . inference , method . upper ()) except AttributeError : raise NameError ( \"Method not available. `method` must be one of 'SNPE', 'SNLE', 'SNRE'.\" ) simulator , prior = prepare_for_sbi ( simulator , prior ) inference = method_fun ( prior = prior ) theta , x = simulate_for_sbi ( simulator = simulator , proposal = prior , num_simulations = num_simulations , num_workers = num_workers , ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () return posterior","title":"infer()"},{"location":"reference/#sbi.utils.user_input_checks.prepare_for_sbi","text":"Prepare simulator and prior for usage in sbi. NOTE: This is a wrapper around process_prior and process_simulator which can be used in isolation as well. Attempts to meet the following requirements by reshaping and type-casting: the simulator function receives as input and returns a Tensor. the simulator can simulate batches of parameters and return batches of data. the prior does not produce batches and samples and evaluates to Tensor. the output shape is a torch.Size((1,N)) (i.e, has a leading batch dimension 1). If this is not possible, a suitable exception will be raised. Parameters: Name Type Description Default simulator Callable Simulator as provided by the user. required prior Prior as provided by the user. required Returns: Type Description Tuple[Callable, torch.distributions.distribution.Distribution] Tuple (simulator, prior) checked and matching the requirements of sbi. Source code in sbi/utils/user_input_checks.py def prepare_for_sbi ( simulator : Callable , prior ) -> Tuple [ Callable , Distribution ]: \"\"\"Prepare simulator and prior for usage in sbi. NOTE: This is a wrapper around `process_prior` and `process_simulator` which can be used in isolation as well. Attempts to meet the following requirements by reshaping and type-casting: - the simulator function receives as input and returns a Tensor.<br/> - the simulator can simulate batches of parameters and return batches of data.<br/> - the prior does not produce batches and samples and evaluates to Tensor.<br/> - the output shape is a `torch.Size((1,N))` (i.e, has a leading batch dimension 1). If this is not possible, a suitable exception will be raised. Args: simulator: Simulator as provided by the user. prior: Prior as provided by the user. Returns: Tuple (simulator, prior) checked and matching the requirements of sbi. \"\"\" # Check prior, return PyTorch prior. prior , _ , prior_returns_numpy = process_prior ( prior ) # Check simulator, returns PyTorch simulator able to simulate batches. simulator = process_simulator ( simulator , prior , prior_returns_numpy ) # Consistency check after making ready for sbi. check_sbi_inputs ( simulator , prior ) return simulator , prior","title":"prepare_for_sbi()"},{"location":"reference/#sbi.inference.base.simulate_for_sbi","text":"Returns ( \\(\\theta, x\\) ) pairs obtained from sampling the proposal and simulating. This function performs two steps: Sample parameters \\(\\theta\\) from the proposal . Simulate these parameters to obtain \\(x\\) . Parameters: Name Type Description Default simulator Callable A function that takes parameters \\(\\theta\\) and maps them to simulations, or observations, x , \\(\\text{sim}(\\theta)\\to x\\) . Any regular Python callable (i.e. function or class with __call__ method) can be used. required proposal Any Probability distribution that the parameters \\(\\theta\\) are sampled from. required num_simulations int Number of simulations that are run. required num_workers int Number of parallel workers to use for simulations. 1 simulation_batch_size int Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). 1 seed Optional[int] Seed for reproducibility. None show_progress_bar bool Whether to show a progress bar for simulating. This will not affect whether there will be a progressbar while drawing samples from the proposal. True Returns: Sampled parameters \\(\\theta\\) and simulation-outputs \\(x\\) . Source code in sbi/inference/base.py def simulate_for_sbi ( simulator : Callable , proposal : Any , num_simulations : int , num_workers : int = 1 , simulation_batch_size : int = 1 , seed : Optional [ int ] = None , show_progress_bar : bool = True , ) -> Tuple [ Tensor , Tensor ]: r \"\"\"Returns ($\\theta, x$) pairs obtained from sampling the proposal and simulating. This function performs two steps: - Sample parameters $\\theta$ from the `proposal`. - Simulate these parameters to obtain $x$. Args: simulator: A function that takes parameters $\\theta$ and maps them to simulations, or observations, `x`, $\\text{sim}(\\theta)\\to x$. Any regular Python callable (i.e. function or class with `__call__` method) can be used. proposal: Probability distribution that the parameters $\\theta$ are sampled from. num_simulations: Number of simulations that are run. num_workers: Number of parallel workers to use for simulations. simulation_batch_size: Number of parameter sets that the simulator maps to data x at once. If None, we simulate all parameter sets at the same time. If >= 1, the simulator has to process data of shape (simulation_batch_size, parameter_dimension). seed: Seed for reproducibility. show_progress_bar: Whether to show a progress bar for simulating. This will not affect whether there will be a progressbar while drawing samples from the proposal. Returns: Sampled parameters $\\theta$ and simulation-outputs $x$. \"\"\" theta = proposal . sample (( num_simulations ,)) x = simulate_in_batches ( simulator = simulator , theta = theta , sim_batch_size = simulation_batch_size , num_workers = num_workers , seed = seed , show_progress_bars = show_progress_bar , ) return theta , x","title":"simulate_for_sbi()"},{"location":"reference/#sbi.inference.snpe.snpe_a.SNPE_A","text":"","title":"SNPE_A"},{"location":"reference/#sbi.inference.snpe.snpe_c.SNPE_C","text":"","title":"SNPE_C"},{"location":"reference/#sbi.inference.snle.snle_a.SNLE_A","text":"","title":"SNLE_A"},{"location":"reference/#sbi.inference.snre.snre_a.SNRE_A","text":"","title":"SNRE_A"},{"location":"reference/#sbi.inference.snre.snre_b.SNRE_B","text":"","title":"SNRE_B"},{"location":"reference/#sbi.inference.snre.snre_c.SNRE_C","text":"","title":"SNRE_C"},{"location":"reference/#sbi.inference.snre.bnre.BNRE","text":"","title":"BNRE"},{"location":"reference/#sbi.inference.abc.mcabc.MCABC","text":"","title":"MCABC"},{"location":"reference/#sbi.inference.abc.smcabc.SMCABC","text":"","title":"SMCABC"},{"location":"reference/#posteriors","text":"","title":"Posteriors"},{"location":"reference/#sbi.inference.posteriors.direct_posterior.DirectPosterior","text":"Posterior \\(p(\\theta|x_o)\\) with log_prob() and sample() methods, only applicable to SNPE. SNPE trains a neural network to directly approximate the posterior distribution. However, for bounded priors, the neural network can have leakage: it puts non-zero mass in regions where the prior is zero. The DirectPosterior class wraps the trained network to deal with these cases. Specifically, this class offers the following functionality: - correct the calculation of the log probability such that it compensates for the leakage. - reject samples that lie outside of the prior bounds. This class can not be used in combination with SNLE or SNRE.","title":"DirectPosterior"},{"location":"reference/#sbi.inference.posteriors.importance_posterior.ImportanceSamplingPosterior","text":"Provides importance sampling to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). ImportanceSamplingPosterior allows to estimate the posterior log-probability by estimating the normlalization constant with importance sampling. It also allows to perform importance sampling (with .sample() ) and to draw approximate samples with sampling-importance-resampling (SIR) (with .sir_sample() )","title":"ImportanceSamplingPosterior"},{"location":"reference/#sbi.inference.posteriors.mcmc_posterior.MCMCPosterior","text":"Provides MCMC to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). MCMCPosterior allows to sample from the posterior with MCMC.","title":"MCMCPosterior"},{"location":"reference/#sbi.inference.posteriors.rejection_posterior.RejectionPosterior","text":"Provides rejection sampling to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). RejectionPosterior allows to sample from the posterior with rejection sampling.","title":"RejectionPosterior"},{"location":"reference/#sbi.inference.posteriors.vi_posterior.VIPosterior","text":"Provides VI (Variational Inference) to sample from the posterior. SNLE or SNRE train neural networks to approximate the likelihood(-ratios). VIPosterior allows to learn a tractable variational posterior \\(q(\\theta)\\) which approximates the true posterior \\(p(\\theta|x_o)\\) . After this second training stage, we can produce approximate posterior samples, by just sampling from q with no additional cost. For additional information see [1] and [2]. References: [1] Variational methods for simulation-based inference, Manuel Gl\u00f6ckler, Michael Deistler, Jakob Macke, 2022, https://openreview.net/forum?id=kZ0UYdhqkNY [2] Sequential Neural Posterior and Likelihood Approximation, Samuel Wiqvist, Jes Frellsen, Umberto Picchini, 2021, https://arxiv.org/abs/2102.06522","title":"VIPosterior"},{"location":"reference/#models","text":"","title":"Models"},{"location":"reference/#sbi.utils.get_nn_models.posterior_nn","text":"Returns a function that builds a density estimator for learning the posterior. This function will usually be used for SNPE. The returned function is to be passed to the inference class when using the flexible interface. Parameters: Name Type Description Default model str The type of density estimator that will be created. One of [ mdn , made , maf , maf_rqs , nsf ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 num_transforms int Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a maf or a nsf ). Ignored if density estimator is a mdn or made . 5 num_bins int Number of bins used for the splines in nsf . Ignored if density estimator not nsf . 10 embedding_net Module Optional embedding network for simulation outputs \\(x\\) . This embedding net allows to learn features from potentially high-dimensional simulation outputs. Identity() num_components int Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. 10 kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def posterior_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , num_transforms : int = 5 , num_bins : int = 10 , embedding_net : nn . Module = nn . Identity (), num_components : int = 10 , ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a density estimator for learning the posterior. This function will usually be used for SNPE. The returned function is to be passed to the inference class when using the flexible interface. Args: model: The type of density estimator that will be created. One of [`mdn`, `made`, `maf`, `maf_rqs`, `nsf`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. num_transforms: Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a `maf` or a `nsf`). Ignored if density estimator is a `mdn` or `made`. num_bins: Number of bins used for the splines in `nsf`. Ignored if density estimator not `nsf`. embedding_net: Optional embedding network for simulation outputs $x$. This embedding net allows to learn features from potentially high-dimensional simulation outputs. num_components: Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"num_transforms\" , \"num_bins\" , \"embedding_net\" , \"num_components\" , ), ( z_score_theta , z_score_x , hidden_features , num_transforms , num_bins , embedding_net , num_components , ), ), ** kwargs , ) def build_fn_snpe_a ( batch_theta , batch_x , num_components ): \"\"\"Build function for SNPE-A Extract the number of components from the kwargs, such that they are exposed as a kwargs, offering the possibility to later override this kwarg with `functools.partial`. This is necessary in order to make sure that the MDN in SNPE-A only has one component when running the Algorithm 1 part. \"\"\" return build_mdn ( batch_x = batch_theta , batch_y = batch_x , num_components = num_components , ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"mdn\" : return build_mdn ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"made\" : return build_made ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"maf\" : return build_maf ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"maf_rqs\" : return build_maf_rqs ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) elif model == \"nsf\" : return build_nsf ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) else : raise NotImplementedError if model == \"mdn_snpe_a\" : if num_components != 10 : raise ValueError ( \"You set `num_components`. For SNPE-A, this has to be done at \" \"instantiation of the inference object, i.e. \" \"`inference = SNPE_A(..., num_components=20)`\" ) kwargs . pop ( \"num_components\" ) return build_fn_snpe_a if model == \"mdn_snpe_a\" else build_fn","title":"posterior_nn()"},{"location":"reference/#sbi.utils.get_nn_models.likelihood_nn","text":"Returns a function that builds a density estimator for learning the likelihood. This function will usually be used for SNLE. The returned function is to be passed to the inference class when using the flexible interface. Parameters: Name Type Description Default model str The type of density estimator that will be created. One of [ mdn , made , maf , maf_rqs , nsf ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 num_transforms int Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a maf or a nsf ). Ignored if density estimator is a mdn or made . 5 num_bins int Number of bins used for the splines in nsf . Ignored if density estimator not nsf . 10 embedding_net Module Optional embedding network for parameters \\(\\theta\\) . Identity() num_components int Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. 10 kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def likelihood_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , num_transforms : int = 5 , num_bins : int = 10 , embedding_net : nn . Module = nn . Identity (), num_components : int = 10 , ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a density estimator for learning the likelihood. This function will usually be used for SNLE. The returned function is to be passed to the inference class when using the flexible interface. Args: model: The type of density estimator that will be created. One of [`mdn`, `made`, `maf`, `maf_rqs`, `nsf`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. num_transforms: Number of transforms when a flow is used. Only relevant if density estimator is a normalizing flow (i.e. currently either a `maf` or a `nsf`). Ignored if density estimator is a `mdn` or `made`. num_bins: Number of bins used for the splines in `nsf`. Ignored if density estimator not `nsf`. embedding_net: Optional embedding network for parameters $\\theta$. num_components: Number of mixture components for a mixture of Gaussians. Ignored if density estimator is not an mdn. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"num_transforms\" , \"num_bins\" , \"embedding_net\" , \"num_components\" , ), ( z_score_x , z_score_theta , hidden_features , num_transforms , num_bins , embedding_net , num_components , ), ), ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"mdn\" : return build_mdn ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"made\" : return build_made ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"maf\" : return build_maf ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"maf_rqs\" : return build_maf_rqs ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"nsf\" : return build_nsf ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) elif model == \"mnle\" : return build_mnle ( batch_x = batch_x , batch_y = batch_theta , ** kwargs ) else : raise NotImplementedError return build_fn","title":"likelihood_nn()"},{"location":"reference/#sbi.utils.get_nn_models.classifier_nn","text":"Returns a function that builds a classifier for learning density ratios. This function will usually be used for SNRE. The returned function is to be passed to the inference class when using the flexible interface. Note that in the view of the SNRE classifier we build below, x=theta and y=x. Parameters: Name Type Description Default model str The type of classifier that will be created. One of [ linear , mlp , resnet ]. required z_score_theta Optional[str] Whether to z-score parameters \\(\\theta\\) before passing them into the network, can take one of the following: - none , or None: do not z-score. - independent : z-score each dimension independently. - structured : treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. 'independent' z_score_x Optional[str] Whether to z-score simulation outputs \\(x\\) before passing them into the network, same options as z_score_theta. 'independent' hidden_features int Number of hidden features. 50 embedding_net_theta Module Optional embedding network for parameters \\(\\theta\\) . Identity() embedding_net_x Module Optional embedding network for simulation outputs \\(x\\) . This embedding net allows to learn features from potentially high-dimensional simulation outputs. Identity() kwargs additional custom arguments passed to downstream build functions. {} Source code in sbi/utils/get_nn_models.py def classifier_nn ( model : str , z_score_theta : Optional [ str ] = \"independent\" , z_score_x : Optional [ str ] = \"independent\" , hidden_features : int = 50 , embedding_net_theta : nn . Module = nn . Identity (), embedding_net_x : nn . Module = nn . Identity (), ** kwargs , ) -> Callable : r \"\"\" Returns a function that builds a classifier for learning density ratios. This function will usually be used for SNRE. The returned function is to be passed to the inference class when using the flexible interface. Note that in the view of the SNRE classifier we build below, x=theta and y=x. Args: model: The type of classifier that will be created. One of [`linear`, `mlp`, `resnet`]. z_score_theta: Whether to z-score parameters $\\theta$ before passing them into the network, can take one of the following: - `none`, or None: do not z-score. - `independent`: z-score each dimension independently. - `structured`: treat dimensions as related, therefore compute mean and std over the entire batch, instead of per-dimension. Should be used when each sample is, for example, a time series or an image. z_score_x: Whether to z-score simulation outputs $x$ before passing them into the network, same options as z_score_theta. hidden_features: Number of hidden features. embedding_net_theta: Optional embedding network for parameters $\\theta$. embedding_net_x: Optional embedding network for simulation outputs $x$. This embedding net allows to learn features from potentially high-dimensional simulation outputs. kwargs: additional custom arguments passed to downstream build functions. \"\"\" kwargs = dict ( zip ( ( \"z_score_x\" , \"z_score_y\" , \"hidden_features\" , \"embedding_net_x\" , \"embedding_net_y\" , ), ( z_score_theta , z_score_x , hidden_features , embedding_net_theta , embedding_net_x , ), ), ** kwargs , ) def build_fn ( batch_theta , batch_x ): if model == \"linear\" : return build_linear_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) if model == \"mlp\" : return build_mlp_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) if model == \"resnet\" : return build_resnet_classifier ( batch_x = batch_theta , batch_y = batch_x , ** kwargs ) else : raise NotImplementedError return build_fn","title":"classifier_nn()"},{"location":"reference/#potentials","text":"","title":"Potentials"},{"location":"reference/#sbi.inference.potentials.posterior_based_potential.posterior_estimator_based_potential","text":"Returns the potential for posterior-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. The potential is the same as the log-probability of the posterior_estimator , but it is set to \\(-\\inf\\) outside of the prior bounds. Parameters: Name Type Description Default posterior_estimator Module The neural network modelling the posterior. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the posterior. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/posterior_based_potential.py def posterior_estimator_based_potential ( posterior_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns the potential for posterior-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. The potential is the same as the log-probability of the `posterior_estimator`, but it is set to $-\\inf$ outside of the prior bounds. Args: posterior_estimator: The neural network modelling the posterior. prior: The prior distribution. x_o: The observed data at which to evaluate the posterior. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( posterior_estimator . parameters ()) . device ) potential_fn = PosteriorBasedPotential ( posterior_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform","title":"posterior_estimator_based_potential()"},{"location":"reference/#sbi.inference.potentials.likelihood_based_potential.likelihood_estimator_based_potential","text":"Returns potential \\(\\log(p(x_o|\\theta)p(\\theta))\\) for likelihood-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Parameters: Name Type Description Default likelihood_estimator Module The neural network modelling the likelihood. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the likelihood. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function \\(p(x_o|\\theta)p(\\theta)\\) and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/likelihood_based_potential.py def likelihood_estimator_based_potential ( likelihood_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns potential $\\log(p(x_o|\\theta)p(\\theta))$ for likelihood-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Args: likelihood_estimator: The neural network modelling the likelihood. prior: The prior distribution. x_o: The observed data at which to evaluate the likelihood. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function $p(x_o|\\theta)p(\\theta)$ and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( likelihood_estimator . parameters ()) . device ) potential_fn = LikelihoodBasedPotential ( likelihood_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform","title":"likelihood_estimator_based_potential()"},{"location":"reference/#sbi.inference.potentials.ratio_based_potential.ratio_estimator_based_potential","text":"Returns the potential for ratio-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Parameters: Name Type Description Default ratio_estimator Module The neural network modelling likelihood-to-evidence ratio. required prior Distribution The prior distribution. required x_o Optional[torch.Tensor] The observed data at which to evaluate the likelihood-to-evidence ratio. required enable_transform bool Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for theta_transform . True Returns: Type Description Tuple[Callable, torch Transform] The potential function and a transformation that maps to unconstrained space. Source code in sbi/inference/potentials/ratio_based_potential.py def ratio_estimator_based_potential ( ratio_estimator : nn . Module , prior : Distribution , x_o : Optional [ Tensor ], enable_transform : bool = True , ) -> Tuple [ Callable , TorchTransform ]: r \"\"\"Returns the potential for ratio-based methods. It also returns a transformation that can be used to transform the potential into unconstrained space. Args: ratio_estimator: The neural network modelling likelihood-to-evidence ratio. prior: The prior distribution. x_o: The observed data at which to evaluate the likelihood-to-evidence ratio. enable_transform: Whether to transform parameters to unconstrained space. When False, an identity transform will be returned for `theta_transform`. Returns: The potential function and a transformation that maps to unconstrained space. \"\"\" device = str ( next ( ratio_estimator . parameters ()) . device ) potential_fn = RatioBasedPotential ( ratio_estimator , prior , x_o , device = device ) theta_transform = mcmc_transform ( prior , device = device , enable_transform = enable_transform ) return potential_fn , theta_transform","title":"ratio_estimator_based_potential()"},{"location":"reference/#analysis","text":"","title":"Analysis"},{"location":"reference/#sbi.analysis.plot.pairplot","text":"Plot samples in a 2D grid showing marginals and pairwise marginals. Each of the diagonal plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Each upper-diagonal plot can be interpreted as a 2D-marginal of the distribution. Parameters: Name Type Description Default samples Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Samples used to build the histogram. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] List of additional points to scatter. None limits Union[List, torch.Tensor] Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on). None offdiag Union[str, List[str]] Plotting style for upper diagonal, {hist, scatter, contour, cond, None}. 'hist' upper Optional[str] deprecated, use offdiag instead. None diag Union[str, List[str]] Plotting style for diagonal, {hist, cond, None}. 'hist' figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def pairplot ( samples : Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , limits : Optional [ Union [ List , torch . Tensor ]] = None , subset : Optional [ List [ int ]] = None , offdiag : Optional [ Union [ List [ str ], str ]] = \"hist\" , diag : Optional [ Union [ List [ str ], str ]] = \"hist\" , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], upper : Optional [ str ] = None , fig = None , axes = None , ** kwargs , ): \"\"\" Plot samples in a 2D grid showing marginals and pairwise marginals. Each of the diagonal plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Each upper-diagonal plot can be interpreted as a 2D-marginal of the distribution. Args: samples: Samples used to build the histogram. points: List of additional points to scatter. limits: Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on). offdiag: Plotting style for upper diagonal, {hist, scatter, contour, cond, None}. upper: deprecated, use offdiag instead. diag: Plotting style for diagonal, {hist, cond, None}. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" # TODO: add color map support # TODO: automatically determine good bin sizes for histograms # TODO: add legend (if legend is True) opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) samples , dim , limits = prepare_for_plot ( samples , limits ) # checks. if opts [ \"legend\" ]: assert len ( opts [ \"samples_labels\" ]) >= len ( samples ), \"Provide at least as many labels as samples.\" if opts [ \"upper\" ] is not None : warn ( \"upper is deprecated, use offdiag instead.\" ) opts [ \"offdiag\" ] = opts [ \"upper\" ] # Prepare diag/upper/lower if type ( opts [ \"diag\" ]) is not list : opts [ \"diag\" ] = [ opts [ \"diag\" ] for _ in range ( len ( samples ))] if type ( opts [ \"offdiag\" ]) is not list : opts [ \"offdiag\" ] = [ opts [ \"offdiag\" ] for _ in range ( len ( samples ))] # if type(opts['lower']) is not list: # opts['lower'] = [opts['lower'] for _ in range(len(samples))] opts [ \"lower\" ] = None diag_func = get_diag_func ( samples , limits , opts , ** kwargs ) def offdiag_func ( row , col , limits , ** kwargs ): if len ( samples ) > 0 : for n , v in enumerate ( samples ): if opts [ \"offdiag\" ][ n ] == \"hist\" or opts [ \"offdiag\" ][ n ] == \"hist2d\" : hist , xedges , yedges = np . histogram2d ( v [:, col ], v [:, row ], range = [ [ limits [ col ][ 0 ], limits [ col ][ 1 ]], [ limits [ row ][ 0 ], limits [ row ][ 1 ]], ], ** opts [ \"hist_offdiag\" ], ) plt . imshow ( hist . T , origin = \"lower\" , extent = ( xedges [ 0 ], xedges [ - 1 ], yedges [ 0 ], yedges [ - 1 ], ), aspect = \"auto\" , ) elif opts [ \"offdiag\" ][ n ] in [ \"kde\" , \"kde2d\" , \"contour\" , \"contourf\" , ]: density = gaussian_kde ( v [:, [ col , row ]] . T , bw_method = opts [ \"kde_offdiag\" ][ \"bw_method\" ], ) X , Y = np . meshgrid ( np . linspace ( limits [ col ][ 0 ], limits [ col ][ 1 ], opts [ \"kde_offdiag\" ][ \"bins\" ], ), np . linspace ( limits [ row ][ 0 ], limits [ row ][ 1 ], opts [ \"kde_offdiag\" ][ \"bins\" ], ), ) positions = np . vstack ([ X . ravel (), Y . ravel ()]) Z = np . reshape ( density ( positions ) . T , X . shape ) if opts [ \"offdiag\" ][ n ] == \"kde\" or opts [ \"offdiag\" ][ n ] == \"kde2d\" : plt . imshow ( Z , extent = ( limits [ col ][ 0 ], limits [ col ][ 1 ], limits [ row ][ 0 ], limits [ row ][ 1 ], ), origin = \"lower\" , aspect = \"auto\" , ) elif opts [ \"offdiag\" ][ n ] == \"contour\" : if opts [ \"contour_offdiag\" ][ \"percentile\" ]: Z = probs2contours ( Z , opts [ \"contour_offdiag\" ][ \"levels\" ]) else : Z = ( Z - Z . min ()) / ( Z . max () - Z . min ()) plt . contour ( X , Y , Z , origin = \"lower\" , extent = [ limits [ col ][ 0 ], limits [ col ][ 1 ], limits [ row ][ 0 ], limits [ row ][ 1 ], ], colors = opts [ \"samples_colors\" ][ n ], levels = opts [ \"contour_offdiag\" ][ \"levels\" ], ) else : pass elif opts [ \"offdiag\" ][ n ] == \"scatter\" : plt . scatter ( v [:, col ], v [:, row ], color = opts [ \"samples_colors\" ][ n ], ** opts [ \"scatter_offdiag\" ], ) elif opts [ \"offdiag\" ][ n ] == \"plot\" : plt . plot ( v [:, col ], v [:, row ], color = opts [ \"samples_colors\" ][ n ], ** opts [ \"plot_offdiag\" ], ) else : pass return _arrange_plots ( diag_func , offdiag_func , dim , limits , points , opts , fig = fig , axes = axes )","title":"pairplot()"},{"location":"reference/#sbi.analysis.plot.marginal_plot","text":"Plot samples in a row showing 1D marginals of selected dimensions. Each of the plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Parameters: Name Type Description Default samples Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Samples used to build the histogram. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] List of additional points to scatter. None limits Union[List, torch.Tensor] Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on). None diag Optional[str] Plotting style for 1D marginals, {hist, kde cond, None}. 'hist' figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] points_colors Colors of the points . required fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def marginal_plot ( samples : Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , limits : Optional [ Union [ List , torch . Tensor ]] = None , subset : Optional [ List [ int ]] = None , diag : Optional [ str ] = \"hist\" , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], fig = None , axes = None , ** kwargs , ): \"\"\" Plot samples in a row showing 1D marginals of selected dimensions. Each of the plots can be interpreted as a 1D-marginal of the distribution that the samples were drawn from. Args: samples: Samples used to build the histogram. points: List of additional points to scatter. limits: Array containing the plot xlim for each parameter dimension. If None, just use the min and max of the passed samples subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on). diag: Plotting style for 1D marginals, {hist, kde cond, None}. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. points_colors: Colors of the `points`. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) samples , dim , limits = prepare_for_plot ( samples , limits ) # Prepare diag/upper/lower if type ( opts [ \"diag\" ]) is not list : opts [ \"diag\" ] = [ opts [ \"diag\" ] for _ in range ( len ( samples ))] diag_func = get_diag_func ( samples , limits , opts , ** kwargs ) return _arrange_plots ( diag_func , None , dim , limits , points , opts , fig = fig , axes = axes )","title":"marginal_plot()"},{"location":"reference/#sbi.analysis.plot.conditional_pairplot","text":"Plot conditional distribution given all other parameters. The conditionals can be interpreted as slices through the density at a location given by condition . For example: Say we have a 3D density with parameters \\(\\theta_0\\) , \\(\\theta_1\\) , \\(\\theta_2\\) and a condition \\(c\\) passed by the user in the condition argument. For the plot of \\(\\theta_0\\) on the diagonal, this will plot the conditional \\(p(\\theta_0 | \\theta_1=c[1], \\theta_2=c[2])\\) . For the upper diagonal of \\(\\theta_1\\) and \\(\\theta_2\\) , it will plot \\(p(\\theta_1, \\theta_2 | \\theta_0=c[0])\\) . All other diagonals and upper-diagonals are built in the corresponding way. Parameters: Name Type Description Default density Any Probability density with a log_prob() method. required condition Tensor Condition that all but the one/two regarded parameters are fixed to. The condition should be of shape (1, dim_theta), i.e. it could e.g. be a sample from the posterior distribution. required limits Union[List, torch.Tensor] Limits in between which each parameter will be evaluated. required points Union[List[numpy.ndarray], List[torch.Tensor], numpy.ndarray, torch.Tensor] Additional points to scatter. None subset Optional[List[int]] List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1 st and 3 rd dimension but will discard the 0 th and 2 nd (and, if they exist, the 4 th , 5 th and so on) None resolution int Resolution of the grid at which we evaluate the pdf . 50 figsize Tuple Size of the entire figure. (10, 10) labels Optional[List[str]] List of strings specifying the names of the parameters. None ticks Union[List, torch.Tensor] Position of the ticks. [] points_colors Colors of the points . required fig matplotlib figure to plot on. None axes matplotlib axes corresponding to fig. None **kwargs Additional arguments to adjust the plot, e.g., samples_colors , points_colors and many more, see the source code in _get_default_opts() in sbi.analysis.plot for details. {} Returns: figure and axis of posterior distribution plot Source code in sbi/analysis/plot.py def conditional_pairplot ( density : Any , condition : torch . Tensor , limits : Union [ List , torch . Tensor ], points : Optional [ Union [ List [ np . ndarray ], List [ torch . Tensor ], np . ndarray , torch . Tensor ] ] = None , subset : Optional [ List [ int ]] = None , resolution : int = 50 , figsize : Tuple = ( 10 , 10 ), labels : Optional [ List [ str ]] = None , ticks : Union [ List , torch . Tensor ] = [], fig = None , axes = None , ** kwargs , ): r \"\"\" Plot conditional distribution given all other parameters. The conditionals can be interpreted as slices through the `density` at a location given by `condition`. For example: Say we have a 3D density with parameters $\\theta_0$, $\\theta_1$, $\\theta_2$ and a condition $c$ passed by the user in the `condition` argument. For the plot of $\\theta_0$ on the diagonal, this will plot the conditional $p(\\theta_0 | \\theta_1=c[1], \\theta_2=c[2])$. For the upper diagonal of $\\theta_1$ and $\\theta_2$, it will plot $p(\\theta_1, \\theta_2 | \\theta_0=c[0])$. All other diagonals and upper-diagonals are built in the corresponding way. Args: density: Probability density with a `log_prob()` method. condition: Condition that all but the one/two regarded parameters are fixed to. The condition should be of shape (1, dim_theta), i.e. it could e.g. be a sample from the posterior distribution. limits: Limits in between which each parameter will be evaluated. points: Additional points to scatter. subset: List containing the dimensions to plot. E.g. subset=[1,3] will plot plot only the 1st and 3rd dimension but will discard the 0th and 2nd (and, if they exist, the 4th, 5th and so on) resolution: Resolution of the grid at which we evaluate the `pdf`. figsize: Size of the entire figure. labels: List of strings specifying the names of the parameters. ticks: Position of the ticks. points_colors: Colors of the `points`. fig: matplotlib figure to plot on. axes: matplotlib axes corresponding to fig. **kwargs: Additional arguments to adjust the plot, e.g., `samples_colors`, `points_colors` and many more, see the source code in `_get_default_opts()` in `sbi.analysis.plot` for details. Returns: figure and axis of posterior distribution plot \"\"\" device = density . _device if hasattr ( density , \"_device\" ) else \"cpu\" # Setting these is required because _pairplot_scaffold will check if opts['diag'] is # `None`. This would break if opts has no key 'diag'. Same for 'upper'. diag = \"cond\" offdiag = \"cond\" opts = _get_default_opts () # update the defaults dictionary by the current values of the variables (passed by # the user) opts = _update ( opts , locals ()) opts = _update ( opts , kwargs ) opts [ \"lower\" ] = None dim , limits , eps_margins = prepare_for_conditional_plot ( condition , opts ) diag_func = get_conditional_diag_func ( opts , limits , eps_margins , resolution ) def offdiag_func ( row , col , ** kwargs ): p_image = ( eval_conditional_density ( opts [ \"density\" ], opts [ \"condition\" ] . to ( device ), limits . to ( device ), row , col , resolution = resolution , eps_margins1 = eps_margins [ row ], eps_margins2 = eps_margins [ col ], ) . to ( \"cpu\" ) . numpy () ) plt . imshow ( p_image . T , origin = \"lower\" , extent = ( limits [ col , 0 ] . item (), limits [ col , 1 ] . item (), limits [ row , 0 ] . item (), limits [ row , 1 ] . item (), ), aspect = \"auto\" , ) return _arrange_plots ( diag_func , offdiag_func , dim , limits , points , opts , fig = fig , axes = axes )","title":"conditional_pairplot()"},{"location":"reference/#sbi.analysis.conditional_density.conditional_corrcoeff","text":"Returns the conditional correlation matrix of a distribution. To compute the conditional distribution, we condition all but two parameters to values from condition , and then compute the Pearson correlation coefficient \\(\\rho\\) between the remaining two parameters under the distribution density . We do so for any pair of parameters specified in subset , thus creating a matrix containing conditional correlations between any pair of parameters. If condition is a batch of conditions, this function computes the conditional correlation matrix for each one of them and returns the mean. Parameters: Name Type Description Default density Any Probability density function with .log_prob() function. required limits Tensor Limits within which to evaluate the density . required condition Tensor Values to condition the density on. If a batch of conditions is passed, we compute the conditional correlation matrix for each of them and return the average conditional correlation matrix. required subset Optional[List[int]] Evaluate the conditional distribution only on a subset of dimensions. If None this function uses all dimensions. None resolution int Number of grid points on which the conditional distribution is evaluated. A higher value increases the accuracy of the estimated correlation but also increases the computational cost. 50 Returns: Average conditional correlation matrix of shape either (num_dim, num_dim) or (len(subset), len(subset)) if subset was specified. Source code in sbi/analysis/conditional_density.py def conditional_corrcoeff ( density : Any , limits : Tensor , condition : Tensor , subset : Optional [ List [ int ]] = None , resolution : int = 50 , ) -> Tensor : r \"\"\"Returns the conditional correlation matrix of a distribution. To compute the conditional distribution, we condition all but two parameters to values from `condition`, and then compute the Pearson correlation coefficient $\\rho$ between the remaining two parameters under the distribution `density`. We do so for any pair of parameters specified in `subset`, thus creating a matrix containing conditional correlations between any pair of parameters. If `condition` is a batch of conditions, this function computes the conditional correlation matrix for each one of them and returns the mean. Args: density: Probability density function with `.log_prob()` function. limits: Limits within which to evaluate the `density`. condition: Values to condition the `density` on. If a batch of conditions is passed, we compute the conditional correlation matrix for each of them and return the average conditional correlation matrix. subset: Evaluate the conditional distribution only on a subset of dimensions. If `None` this function uses all dimensions. resolution: Number of grid points on which the conditional distribution is evaluated. A higher value increases the accuracy of the estimated correlation but also increases the computational cost. Returns: Average conditional correlation matrix of shape either `(num_dim, num_dim)` or `(len(subset), len(subset))` if `subset` was specified. \"\"\" device = density . _device if hasattr ( density , \"_device\" ) else \"cpu\" subset_ = subset if subset is not None else range ( condition . shape [ 1 ]) correlation_matrices = [] for cond in condition : correlation_matrices . append ( torch . stack ( [ compute_corrcoeff ( eval_conditional_density ( density , cond . to ( device ), limits . to ( device ), dim1 = dim1 , dim2 = dim2 , resolution = resolution , ), limits [[ dim1 , dim2 ]] . to ( device ), ) for dim1 in subset_ for dim2 in subset_ if dim1 < dim2 ] ) ) average_correlations = torch . mean ( torch . stack ( correlation_matrices ), dim = 0 ) # `average_correlations` is still a vector containing the upper triangular entries. # Below, assemble them into a matrix: av_correlation_matrix = torch . zeros (( len ( subset_ ), len ( subset_ )), device = device ) triu_indices = torch . triu_indices ( row = len ( subset_ ), col = len ( subset_ ), offset = 1 , device = device ) av_correlation_matrix [ triu_indices [ 0 ], triu_indices [ 1 ]] = average_correlations # Make the matrix symmetric by copying upper diagonal to lower diagonal. av_correlation_matrix = torch . triu ( av_correlation_matrix ) + torch . tril ( av_correlation_matrix . T ) av_correlation_matrix . fill_diagonal_ ( 1.0 ) return av_correlation_matrix","title":"conditional_corrcoeff()"},{"location":"examples/00_HH_simulator/","text":"Inference on Hodgkin-Huxley model: tutorial \u00b6 In this tutorial, we use sbi to do inference on a Hodgkin-Huxley model from neuroscience (Hodgkin and Huxley, 1952). We will learn two parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ) based on a current-clamp recording, that we generate synthetically (in practice, this would be an experimental observation). Note, you find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/examples/00_HH_simulator.ipynb in the sbi repository. First we are going to import basic packages. import numpy as np import torch # visualization import matplotlib as mpl import matplotlib.pyplot as plt # sbi from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer # remove top and right axis from plots mpl . rcParams [ \"axes.spines.right\" ] = False mpl . rcParams [ \"axes.spines.top\" ] = False Different required components \u00b6 Before running inference, let us define the different required components: observed data prior over model parameters simulator 1. Observed data \u00b6 Let us assume we current-clamped a neuron and recorded the following voltage trace: In fact, this voltage trace was not measured experimentally but synthetically generated by simulating a Hodgkin-Huxley model with particular parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ). We will come back to this point later in the tutorial. 2. Simulator \u00b6 We would like to infer the posterior over the two parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ) of a Hodgkin-Huxley model, given the observed electrophysiological recording above. The model has channel kinetics as in Pospischil et al. 2008 , and is defined by the following set of differential equations (parameters of interest highlighted in orange): \\[ \\scriptsize \\begin{align} C_m\\frac{dV}{dt}&=g_1\\left(E_1-V\\right)+ \\color{orange}{\\bar{g}_{Na}}m^3h\\left(E_{Na}-V\\right)+ \\color{orange}{\\bar{g}_{K}}n^4\\left(E_K-V\\right)+ \\bar{g}_Mp\\left(E_K-V\\right)+ I_{inj}+ \\sigma\\eta\\left(t\\right)\\\\ \\frac{dq}{dt}&=\\frac{q_\\infty\\left(V\\right)-q}{\\tau_q\\left(V\\right)},\\;q\\in\\{m,h,n,p\\} \\end{align} \\] Above, \\(V\\) represents the membrane potential, \\(C_m\\) is the membrane capacitance, \\(g_{\\text{l}}\\) is the leak conductance, \\(E_{\\text{l}}\\) is the membrane reversal potential, \\(\\bar{g}_c\\) is the density of channels of type \\(c\\) ( \\(\\text{Na}^+\\) , \\(\\text{K}^+\\) , M), \\(E_c\\) is the reversal potential of \\(c\\) , ( \\(m\\) , \\(h\\) , \\(n\\) , \\(p\\) ) are the respective channel gating kinetic variables, and \\(\\sigma \\eta(t)\\) is the intrinsic neural noise. The right hand side of the voltage dynamics is composed of a leak current, a voltage-dependent \\(\\text{Na}^+\\) current, a delayed-rectifier \\(\\text{K}^+\\) current, a slow voltage-dependent \\(\\text{K}^+\\) current responsible for spike-frequency adaptation, and an injected current \\(I_{\\text{inj}}\\) . Channel gating variables \\(q\\) have dynamics fully characterized by the neuron membrane potential \\(V\\) , given the respective steady-state \\(q_{\\infty}(V)\\) and time constant \\(\\tau_{q}(V)\\) (details in Pospischil et al. 2008). The input current \\(I_{\\text{inj}}\\) is defined as from HH_helper_functions import syn_current I , t_on , t_off , dt , t , A_soma = syn_current () The Hodgkin-Huxley simulator is given by: from HH_helper_functions import HHsimulator Putting the input current and the simulator together: def run_HH_model ( params ): params = np . asarray ( params ) # input current, time step I , t_on , t_off , dt , t , A_soma = syn_current () t = np . arange ( 0 , len ( I ), 1 ) * dt # initial voltage V0 = - 70 states = HHsimulator ( V0 , params . reshape ( 1 , - 1 ), dt , t , I ) return dict ( data = states . reshape ( - 1 ), time = t , dt = dt , I = I . reshape ( - 1 )) To get an idea of the output of the Hodgkin-Huxley model, let us generate some voltage traces for different parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), given the input current \\(I_{\\text{inj}}\\) : # three sets of (g_Na, g_K) params = np . array ([[ 50.0 , 1.0 ], [ 4.0 , 1.5 ], [ 20.0 , 15.0 ]]) num_samples = len ( params [:, 0 ]) sim_samples = np . zeros (( num_samples , len ( I ))) for i in range ( num_samples ): sim_samples [ i , :] = run_HH_model ( params = params [ i , :])[ \"data\" ] # colors for traces col_min = 2 num_colors = num_samples + col_min cm1 = mpl . cm . Blues col1 = [ cm1 ( 1.0 * i / num_colors ) for i in range ( col_min , num_colors )] fig = plt . figure ( figsize = ( 7 , 5 )) gs = mpl . gridspec . GridSpec ( 2 , 1 , height_ratios = [ 4 , 1 ]) ax = plt . subplot ( gs [ 0 ]) for i in range ( num_samples ): plt . plot ( t , sim_samples [ i , :], color = col1 [ i ], lw = 2 ) plt . ylabel ( \"voltage (mV)\" ) ax . set_xticks ([]) ax . set_yticks ([ - 80 , - 20 , 40 ]) ax = plt . subplot ( gs [ 1 ]) plt . plot ( t , I * A_soma * 1e3 , \"k\" , lw = 2 ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"input (nA)\" ) ax . set_xticks ([ 0 , max ( t ) / 2 , max ( t )]) ax . set_yticks ([ 0 , 1.1 * np . max ( I * A_soma * 1e3 )]) ax . yaxis . set_major_formatter ( mpl . ticker . FormatStrFormatter ( \" %.2f \" )) plt . show () As can be seen, the voltage traces can be quite diverse for different parameter values. Often, we are not interested in matching the exact trace, but only in matching certain features thereof. In this example of the Hodgkin-Huxley model, the summary features are the number of spikes, the mean resting potential, the standard deviation of the resting potential, and the first four voltage moments: mean, standard deviation, skewness and kurtosis. Using the function calculate_summary_statistics() imported below, we obtain these statistics from the output of the Hodgkin Huxley simulator. from HH_helper_functions import calculate_summary_statistics Lastly, we define a function that performs all of the above steps at once. The function simulation_wrapper takes in conductance values, runs the Hodgkin Huxley model and then returns the summary statistics. def simulation_wrapper ( params ): \"\"\" Returns summary statistics from conductance values in `params`. Summarizes the output of the HH simulator and converts it to `torch.Tensor`. \"\"\" obs = run_HH_model ( params ) summstats = torch . as_tensor ( calculate_summary_statistics ( obs )) return summstats sbi takes any function as simulator. Thus, sbi also has the flexibility to use simulators that utilize external packages, e.g., Brian ( http://briansimulator.org/ ), nest ( https://www.nest-simulator.org/ ), or NEURON ( https://neuron.yale.edu/neuron/ ). External simulators do not even need to be Python-based as long as they store simulation outputs in a format that can be read from Python. All that is necessary is to wrap your external simulator of choice into a Python callable that takes a parameter set and outputs a set of summary statistics we want to fit the parameters to. 3. Prior over model parameters \u00b6 Now that we have the simulator, we need to define a function with the prior over the model parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), which in this case is chosen to be a Uniform distribution: prior_min = [ 0.5 , 1e-4 ] prior_max = [ 80.0 , 15.0 ] prior = utils . torchutils . BoxUniform ( low = torch . as_tensor ( prior_min ), high = torch . as_tensor ( prior_max ) ) Inference \u00b6 Now that we have all the required components, we can run inference with SNPE to identify parameters whose activity matches this trace. posterior = infer ( simulation_wrapper , prior , method = \"SNPE\" , num_simulations = 300 , num_workers = 4 ) HBox(children=(FloatProgress(value=0.0, description='Running 300 simulations in 300 batches.', max=300.0, styl\u2026 Neural network successfully converged after 233 epochs. Note sbi can parallelize your simulator. If you experience problems with parallelization, try setting num_workers=1 and please give us an error report as a GitHub issue . Coming back to the observed data \u00b6 As mentioned at the beginning of the tutorial, the observed data are generated by the Hodgkin-Huxley model with a set of known parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ). To illustrate how to compute the summary statistics of the observed data, let us regenerate the observed data: # true parameters and respective labels true_params = np . array ([ 50.0 , 5.0 ]) labels_params = [ r \"$g_ {Na} $\" , r \"$g_ {K} $\" ] observation_trace = run_HH_model ( true_params ) observation_summary_statistics = calculate_summary_statistics ( observation_trace ) As we already shown above, the observed voltage traces look as follows: fig = plt . figure ( figsize = ( 7 , 5 )) gs = mpl . gridspec . GridSpec ( 2 , 1 , height_ratios = [ 4 , 1 ]) ax = plt . subplot ( gs [ 0 ]) plt . plot ( observation_trace [ \"time\" ], observation_trace [ \"data\" ]) plt . ylabel ( \"voltage (mV)\" ) plt . title ( \"observed data\" ) plt . setp ( ax , xticks = [], yticks = [ - 80 , - 20 , 40 ]) ax = plt . subplot ( gs [ 1 ]) plt . plot ( observation_trace [ \"time\" ], I * A_soma * 1e3 , \"k\" , lw = 2 ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"input (nA)\" ) ax . set_xticks ([ 0 , max ( observation_trace [ \"time\" ]) / 2 , max ( observation_trace [ \"time\" ])]) ax . set_yticks ([ 0 , 1.1 * np . max ( I * A_soma * 1e3 )]) ax . yaxis . set_major_formatter ( mpl . ticker . FormatStrFormatter ( \" %.2f \" )) Analysis of the posterior given the observed data \u00b6 After running the inference algorithm, let us inspect the inferred posterior distribution over the parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), given the observed trace. To do so, we first draw samples (i.e. consistent parameter sets) from the posterior: samples = posterior . sample (( 10000 ,), x = observation_summary_statistics ) HBox(children=(FloatProgress(value=0.0, description='Drawing 10000 posterior samples', max=10000.0, style=Prog\u2026 fig , axes = analysis . pairplot ( samples , limits = [[ 0.5 , 80 ], [ 1e-4 , 15.0 ]], ticks = [[ 0.5 , 80 ], [ 1e-4 , 15.0 ]], figsize = ( 5 , 5 ), points = true_params , points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , ); As can be seen, the inferred posterior contains the ground-truth parameters (red) in a high-probability region. Now, let us sample parameters from the posterior distribution, simulate the Hodgkin-Huxley model for this parameter set and compare the simulations with the observed data: # Draw a sample from the posterior and convert to numpy for plotting. posterior_sample = posterior . sample (( 1 ,), x = observation_summary_statistics ) . numpy () HBox(children=(FloatProgress(value=0.0, description='Drawing 1 posterior samples', max=1.0, style=ProgressStyl\u2026 fig = plt . figure ( figsize = ( 7 , 5 )) # plot observation t = observation_trace [ \"time\" ] y_obs = observation_trace [ \"data\" ] plt . plot ( t , y_obs , lw = 2 , label = \"observation\" ) # simulate and plot samples x = run_HH_model ( posterior_sample ) plt . plot ( t , x [ \"data\" ], \"--\" , lw = 2 , label = \"posterior sample\" ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"voltage (mV)\" ) ax = plt . gca () handles , labels = ax . get_legend_handles_labels () ax . legend ( handles [:: - 1 ], labels [:: - 1 ], bbox_to_anchor = ( 1.3 , 1 ), loc = \"upper right\" ) ax . set_xticks ([ 0 , 60 , 120 ]) ax . set_yticks ([ - 80 , - 20 , 40 ]); As can be seen, the sample from the inferred posterior leads to simulations that closely resemble the observed data, confirming that SNPE did a good job at capturing the observed data in this simple case. References \u00b6 A. L. Hodgkin and A. F. Huxley. A quantitative description of membrane current and its application to conduction and excitation in nerve. The Journal of Physiology, 117(4):500\u2013544, 1952. M. Pospischil, M. Toledo-Rodriguez, C. Monier, Z. Piwkowska, T. Bal, Y. Fr\u00e9gnac, H. Markram, and A. Destexhe. Minimal Hodgkin-Huxley type models for different classes of cortical and thalamic neurons. Biological Cybernetics, 99(4-5), 2008.","title":"Hodgkin-Huxley example"},{"location":"examples/00_HH_simulator/#inference-on-hodgkin-huxley-model-tutorial","text":"In this tutorial, we use sbi to do inference on a Hodgkin-Huxley model from neuroscience (Hodgkin and Huxley, 1952). We will learn two parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ) based on a current-clamp recording, that we generate synthetically (in practice, this would be an experimental observation). Note, you find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/examples/00_HH_simulator.ipynb in the sbi repository. First we are going to import basic packages. import numpy as np import torch # visualization import matplotlib as mpl import matplotlib.pyplot as plt # sbi from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer # remove top and right axis from plots mpl . rcParams [ \"axes.spines.right\" ] = False mpl . rcParams [ \"axes.spines.top\" ] = False","title":"Inference on Hodgkin-Huxley model: tutorial"},{"location":"examples/00_HH_simulator/#different-required-components","text":"Before running inference, let us define the different required components: observed data prior over model parameters simulator","title":"Different required components"},{"location":"examples/00_HH_simulator/#1-observed-data","text":"Let us assume we current-clamped a neuron and recorded the following voltage trace: In fact, this voltage trace was not measured experimentally but synthetically generated by simulating a Hodgkin-Huxley model with particular parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ). We will come back to this point later in the tutorial.","title":"1. Observed data"},{"location":"examples/00_HH_simulator/#2-simulator","text":"We would like to infer the posterior over the two parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ) of a Hodgkin-Huxley model, given the observed electrophysiological recording above. The model has channel kinetics as in Pospischil et al. 2008 , and is defined by the following set of differential equations (parameters of interest highlighted in orange): \\[ \\scriptsize \\begin{align} C_m\\frac{dV}{dt}&=g_1\\left(E_1-V\\right)+ \\color{orange}{\\bar{g}_{Na}}m^3h\\left(E_{Na}-V\\right)+ \\color{orange}{\\bar{g}_{K}}n^4\\left(E_K-V\\right)+ \\bar{g}_Mp\\left(E_K-V\\right)+ I_{inj}+ \\sigma\\eta\\left(t\\right)\\\\ \\frac{dq}{dt}&=\\frac{q_\\infty\\left(V\\right)-q}{\\tau_q\\left(V\\right)},\\;q\\in\\{m,h,n,p\\} \\end{align} \\] Above, \\(V\\) represents the membrane potential, \\(C_m\\) is the membrane capacitance, \\(g_{\\text{l}}\\) is the leak conductance, \\(E_{\\text{l}}\\) is the membrane reversal potential, \\(\\bar{g}_c\\) is the density of channels of type \\(c\\) ( \\(\\text{Na}^+\\) , \\(\\text{K}^+\\) , M), \\(E_c\\) is the reversal potential of \\(c\\) , ( \\(m\\) , \\(h\\) , \\(n\\) , \\(p\\) ) are the respective channel gating kinetic variables, and \\(\\sigma \\eta(t)\\) is the intrinsic neural noise. The right hand side of the voltage dynamics is composed of a leak current, a voltage-dependent \\(\\text{Na}^+\\) current, a delayed-rectifier \\(\\text{K}^+\\) current, a slow voltage-dependent \\(\\text{K}^+\\) current responsible for spike-frequency adaptation, and an injected current \\(I_{\\text{inj}}\\) . Channel gating variables \\(q\\) have dynamics fully characterized by the neuron membrane potential \\(V\\) , given the respective steady-state \\(q_{\\infty}(V)\\) and time constant \\(\\tau_{q}(V)\\) (details in Pospischil et al. 2008). The input current \\(I_{\\text{inj}}\\) is defined as from HH_helper_functions import syn_current I , t_on , t_off , dt , t , A_soma = syn_current () The Hodgkin-Huxley simulator is given by: from HH_helper_functions import HHsimulator Putting the input current and the simulator together: def run_HH_model ( params ): params = np . asarray ( params ) # input current, time step I , t_on , t_off , dt , t , A_soma = syn_current () t = np . arange ( 0 , len ( I ), 1 ) * dt # initial voltage V0 = - 70 states = HHsimulator ( V0 , params . reshape ( 1 , - 1 ), dt , t , I ) return dict ( data = states . reshape ( - 1 ), time = t , dt = dt , I = I . reshape ( - 1 )) To get an idea of the output of the Hodgkin-Huxley model, let us generate some voltage traces for different parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), given the input current \\(I_{\\text{inj}}\\) : # three sets of (g_Na, g_K) params = np . array ([[ 50.0 , 1.0 ], [ 4.0 , 1.5 ], [ 20.0 , 15.0 ]]) num_samples = len ( params [:, 0 ]) sim_samples = np . zeros (( num_samples , len ( I ))) for i in range ( num_samples ): sim_samples [ i , :] = run_HH_model ( params = params [ i , :])[ \"data\" ] # colors for traces col_min = 2 num_colors = num_samples + col_min cm1 = mpl . cm . Blues col1 = [ cm1 ( 1.0 * i / num_colors ) for i in range ( col_min , num_colors )] fig = plt . figure ( figsize = ( 7 , 5 )) gs = mpl . gridspec . GridSpec ( 2 , 1 , height_ratios = [ 4 , 1 ]) ax = plt . subplot ( gs [ 0 ]) for i in range ( num_samples ): plt . plot ( t , sim_samples [ i , :], color = col1 [ i ], lw = 2 ) plt . ylabel ( \"voltage (mV)\" ) ax . set_xticks ([]) ax . set_yticks ([ - 80 , - 20 , 40 ]) ax = plt . subplot ( gs [ 1 ]) plt . plot ( t , I * A_soma * 1e3 , \"k\" , lw = 2 ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"input (nA)\" ) ax . set_xticks ([ 0 , max ( t ) / 2 , max ( t )]) ax . set_yticks ([ 0 , 1.1 * np . max ( I * A_soma * 1e3 )]) ax . yaxis . set_major_formatter ( mpl . ticker . FormatStrFormatter ( \" %.2f \" )) plt . show () As can be seen, the voltage traces can be quite diverse for different parameter values. Often, we are not interested in matching the exact trace, but only in matching certain features thereof. In this example of the Hodgkin-Huxley model, the summary features are the number of spikes, the mean resting potential, the standard deviation of the resting potential, and the first four voltage moments: mean, standard deviation, skewness and kurtosis. Using the function calculate_summary_statistics() imported below, we obtain these statistics from the output of the Hodgkin Huxley simulator. from HH_helper_functions import calculate_summary_statistics Lastly, we define a function that performs all of the above steps at once. The function simulation_wrapper takes in conductance values, runs the Hodgkin Huxley model and then returns the summary statistics. def simulation_wrapper ( params ): \"\"\" Returns summary statistics from conductance values in `params`. Summarizes the output of the HH simulator and converts it to `torch.Tensor`. \"\"\" obs = run_HH_model ( params ) summstats = torch . as_tensor ( calculate_summary_statistics ( obs )) return summstats sbi takes any function as simulator. Thus, sbi also has the flexibility to use simulators that utilize external packages, e.g., Brian ( http://briansimulator.org/ ), nest ( https://www.nest-simulator.org/ ), or NEURON ( https://neuron.yale.edu/neuron/ ). External simulators do not even need to be Python-based as long as they store simulation outputs in a format that can be read from Python. All that is necessary is to wrap your external simulator of choice into a Python callable that takes a parameter set and outputs a set of summary statistics we want to fit the parameters to.","title":"2. Simulator"},{"location":"examples/00_HH_simulator/#3-prior-over-model-parameters","text":"Now that we have the simulator, we need to define a function with the prior over the model parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), which in this case is chosen to be a Uniform distribution: prior_min = [ 0.5 , 1e-4 ] prior_max = [ 80.0 , 15.0 ] prior = utils . torchutils . BoxUniform ( low = torch . as_tensor ( prior_min ), high = torch . as_tensor ( prior_max ) )","title":"3. Prior over model parameters"},{"location":"examples/00_HH_simulator/#inference","text":"Now that we have all the required components, we can run inference with SNPE to identify parameters whose activity matches this trace. posterior = infer ( simulation_wrapper , prior , method = \"SNPE\" , num_simulations = 300 , num_workers = 4 ) HBox(children=(FloatProgress(value=0.0, description='Running 300 simulations in 300 batches.', max=300.0, styl\u2026 Neural network successfully converged after 233 epochs. Note sbi can parallelize your simulator. If you experience problems with parallelization, try setting num_workers=1 and please give us an error report as a GitHub issue .","title":"Inference"},{"location":"examples/00_HH_simulator/#coming-back-to-the-observed-data","text":"As mentioned at the beginning of the tutorial, the observed data are generated by the Hodgkin-Huxley model with a set of known parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ). To illustrate how to compute the summary statistics of the observed data, let us regenerate the observed data: # true parameters and respective labels true_params = np . array ([ 50.0 , 5.0 ]) labels_params = [ r \"$g_ {Na} $\" , r \"$g_ {K} $\" ] observation_trace = run_HH_model ( true_params ) observation_summary_statistics = calculate_summary_statistics ( observation_trace ) As we already shown above, the observed voltage traces look as follows: fig = plt . figure ( figsize = ( 7 , 5 )) gs = mpl . gridspec . GridSpec ( 2 , 1 , height_ratios = [ 4 , 1 ]) ax = plt . subplot ( gs [ 0 ]) plt . plot ( observation_trace [ \"time\" ], observation_trace [ \"data\" ]) plt . ylabel ( \"voltage (mV)\" ) plt . title ( \"observed data\" ) plt . setp ( ax , xticks = [], yticks = [ - 80 , - 20 , 40 ]) ax = plt . subplot ( gs [ 1 ]) plt . plot ( observation_trace [ \"time\" ], I * A_soma * 1e3 , \"k\" , lw = 2 ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"input (nA)\" ) ax . set_xticks ([ 0 , max ( observation_trace [ \"time\" ]) / 2 , max ( observation_trace [ \"time\" ])]) ax . set_yticks ([ 0 , 1.1 * np . max ( I * A_soma * 1e3 )]) ax . yaxis . set_major_formatter ( mpl . ticker . FormatStrFormatter ( \" %.2f \" ))","title":"Coming back to the observed data"},{"location":"examples/00_HH_simulator/#analysis-of-the-posterior-given-the-observed-data","text":"After running the inference algorithm, let us inspect the inferred posterior distribution over the parameters ( \\(\\bar g_{Na}\\) , \\(\\bar g_K\\) ), given the observed trace. To do so, we first draw samples (i.e. consistent parameter sets) from the posterior: samples = posterior . sample (( 10000 ,), x = observation_summary_statistics ) HBox(children=(FloatProgress(value=0.0, description='Drawing 10000 posterior samples', max=10000.0, style=Prog\u2026 fig , axes = analysis . pairplot ( samples , limits = [[ 0.5 , 80 ], [ 1e-4 , 15.0 ]], ticks = [[ 0.5 , 80 ], [ 1e-4 , 15.0 ]], figsize = ( 5 , 5 ), points = true_params , points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , ); As can be seen, the inferred posterior contains the ground-truth parameters (red) in a high-probability region. Now, let us sample parameters from the posterior distribution, simulate the Hodgkin-Huxley model for this parameter set and compare the simulations with the observed data: # Draw a sample from the posterior and convert to numpy for plotting. posterior_sample = posterior . sample (( 1 ,), x = observation_summary_statistics ) . numpy () HBox(children=(FloatProgress(value=0.0, description='Drawing 1 posterior samples', max=1.0, style=ProgressStyl\u2026 fig = plt . figure ( figsize = ( 7 , 5 )) # plot observation t = observation_trace [ \"time\" ] y_obs = observation_trace [ \"data\" ] plt . plot ( t , y_obs , lw = 2 , label = \"observation\" ) # simulate and plot samples x = run_HH_model ( posterior_sample ) plt . plot ( t , x [ \"data\" ], \"--\" , lw = 2 , label = \"posterior sample\" ) plt . xlabel ( \"time (ms)\" ) plt . ylabel ( \"voltage (mV)\" ) ax = plt . gca () handles , labels = ax . get_legend_handles_labels () ax . legend ( handles [:: - 1 ], labels [:: - 1 ], bbox_to_anchor = ( 1.3 , 1 ), loc = \"upper right\" ) ax . set_xticks ([ 0 , 60 , 120 ]) ax . set_yticks ([ - 80 , - 20 , 40 ]); As can be seen, the sample from the inferred posterior leads to simulations that closely resemble the observed data, confirming that SNPE did a good job at capturing the observed data in this simple case.","title":"Analysis of the posterior given the observed data"},{"location":"examples/00_HH_simulator/#references","text":"A. L. Hodgkin and A. F. Huxley. A quantitative description of membrane current and its application to conduction and excitation in nerve. The Journal of Physiology, 117(4):500\u2013544, 1952. M. Pospischil, M. Toledo-Rodriguez, C. Monier, Z. Piwkowska, T. Bal, Y. Fr\u00e9gnac, H. Markram, and A. Destexhe. Minimal Hodgkin-Huxley type models for different classes of cortical and thalamic neurons. Biological Cybernetics, 99(4-5), 2008.","title":"References"},{"location":"examples/01_decision_making_model/","text":"SBI for decision-making models \u00b6 In a previous tutorial , we showed how to use SBI with trial-based iid data. Such scenarios can arise, for example, in models of perceptual decision making. In addition to trial-based iid data points, these models often come with mixed data types and varying experimental conditions. Here, we show how sbi can be used to perform inference in such models with the MNLE method. Trial-based SBI with mixed data types \u00b6 In some cases, models with trial-based data additionally return data with mixed data types, e.g., continous and discrete data. For example, most computational models of decision-making have continuous reaction times and discrete choices as output. This can induce a problem when performing trial-based SBI that relies on learning a neural likelihood: It is challenging for most density estimators to handle both, continuous and discrete data at the same time. However, there is a recent SBI method for solving this problem, it\u2019s called Mixed Neural Likelihood Estimation (MNLE). It works just like NLE, but with mixed data types. The trick is that it learns two separate density estimators, one for the discrete part of the data, and one for the continuous part, and combines the two to obtain the final neural likelihood. Crucially, the continuous density estimator is trained conditioned on the output of the discrete one, such that statistical dependencies between the discrete and continuous data (e.g., between choices and reaction times) are modeled as well. The interested reader is referred to the original paper available here . MNLE was recently added to sbi (see this PR and also issue ) and follows the same API as SNLE . In this tutorial we will show how to apply MNLE to mixed data, and how to deal with varying experimental conditions. Toy problem for MNLE \u00b6 To illustrate MNLE we set up a toy simulator that outputs mixed data and for which we know the likelihood such we can obtain reference posterior samples via MCMC. Simulator : To simulate mixed data we do the following Sample reaction time from inverse Gamma Sample choices from Binomial Return reaction time \\(rt \\in (0, \\infty)\\) and choice index \\(c \\in \\{0, 1\\}\\) \\[ c \\sim \\text{Binomial}(\\rho) \\\\ rt \\sim \\text{InverseGamma}(\\alpha=2, \\beta) \\\\ \\] Prior : The priors of the two parameters \\(\\rho\\) and \\(\\beta\\) are independent. We define a Beta prior over the probabilty parameter of the Binomial used in the simulator and a Gamma prior over the shape-parameter of the inverse Gamma used in the simulator: \\[ p(\\beta, \\rho) = p(\\beta) \\; p(\\rho) ; \\\\ p(\\beta) = \\text{Gamma}(1, 0.5) \\\\ p(\\text{probs}) = \\text{Beta}(2, 2) \\] Because the InverseGamma and the Binomial likelihoods are well-defined we can perform MCMC on this problem and obtain reference-posterior samples. import matplotlib.pyplot as plt import torch from torch import Tensor from sbi.inference import MNLE from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Categorical , Gamma from sbi.utils import MultipleIndependent from sbi.utils.metrics import c2st from sbi.analysis import pairplot from sbi.inference import MCMCPosterior from sbi.utils.torchutils import atleast_2d from sbi.inference.potentials.likelihood_based_potential import ( MixedLikelihoodBasedPotential , ) from sbi.utils.conditional_density_utils import ConditionedPotential from sbi.utils import mcmc_transform from sbi.inference.potentials.base_potential import BasePotential # Toy simulator for mixed data def mixed_simulator ( theta : Tensor , concentration_scaling : float = 1.0 ): \"\"\"Returns a sample from a mixed distribution given parameters theta. Args: theta: batch of parameters, shape (batch_size, 2) concentration_scaling: scaling factor for the concentration parameter of the InverseGamma distribution, mimics an experimental condition. \"\"\" beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = concentration_scaling * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # The potential function defines the ground truth likelihood and allows us to obtain reference posterior samples via MCMC. class PotentialFunctionProvider ( BasePotential ): allow_iid_x = True # type: ignore def __init__ ( self , prior , x_o , concentration_scaling = 1.0 , device = \"cpu\" ): super () . __init__ ( prior , x_o , device ) self . concentration_scaling = concentration_scaling def __call__ ( self , theta , track_gradients : bool = True ): theta = atleast_2d ( theta ) with torch . set_grad_enabled ( track_gradients ): iid_ll = self . iid_likelihood ( theta ) return iid_ll + self . prior . log_prob ( theta ) def iid_likelihood ( self , theta ): lp_choices = torch . stack ( [ Binomial ( probs = th . reshape ( 1 , - 1 )) . log_prob ( self . x_o [:, 1 :]) for th in theta [:, 1 :] ], dim = 1 , ) lp_rts = torch . stack ( [ InverseGamma ( concentration = self . concentration_scaling * torch . ones_like ( beta_i ), rate = beta_i , ) . log_prob ( self . x_o [:, : 1 ]) for beta_i in theta [:, : 1 ] ], dim = 1 , ) joint_likelihood = ( lp_choices + lp_rts ) . squeeze () assert joint_likelihood . shape == torch . Size ([ self . x_o . shape [ 0 ], theta . shape [ 0 ]]) return joint_likelihood . sum ( 0 ) # Define independent prior. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , ) Obtain reference-posterior samples via analytical likelihood and MCMC \u00b6 torch . manual_seed ( 42 ) num_trials = 10 num_samples = 1000 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) mcmc_kwargs = dict ( num_chains = 20 , warmup_steps = 50 , method = \"slice_np_vectorized\" , init_strategy = \"proposal\" , ) true_posterior = MCMCPosterior ( potential_fn = PotentialFunctionProvider ( prior , x_o ), proposal = prior , theta_transform = mcmc_transform ( prior , enable_transform = True ), ** mcmc_kwargs , ) true_samples = true_posterior . sample (( num_samples ,)) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 10 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Train MNLE and generate samples via MCMC \u00b6 # Training data num_simulations = 20000 # For training the MNLE emulator we need to define a proposal distribution, the prior is # a good choice. proposal = prior theta = proposal . sample (( num_simulations ,)) x = mixed_simulator ( theta ) # Train MNLE and obtain MCMC-based posterior. trainer = MNLE () estimator = trainer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ) /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 73 epochs. # Build posterior from the trained estimator and prior. mnle_posterior = trainer . build_posterior ( prior = prior ) mnle_samples = mnle_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Compare MNLE and reference posterior \u00b6 # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_samples , mnle_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); We see that the inferred MNLE posterior nicely matches the reference posterior, and how both inferred a posterior that is quite different from the prior. Because MNLE training is amortized we can obtain another posterior given a different observation with potentially a different number of trials, just by running MCMC again (without re-training MNLE ): Repeat inference with different x_o that contains more trials \u00b6 num_trials = 50 x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) true_samples = true_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) mnle_samples = mnle_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 50 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_samples , mnle_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); print ( c2st ( true_samples , mnle_samples )[ 0 ]) tensor(0.5565) Again we can see that the posteriors match nicely. In addition, we observe that the posterior\u2019s (epistemic) uncertainty reduces as we increase the number of trials. Note: MNLE is trained on single-trial data. Theoretically, density estimation is perfectly accurate only in the limit of infinite training data. Thus, training with a finite amount of training data naturally induces a small bias in the density estimator. As we observed above, this bias is so small that we don\u2019t really notice it, e.g., the c2st scores were close to 0.5. However, when we increase the number of trials in x_o dramatically (on the order of 1000s) the small bias can accumulate over the trials and inference with MNLE can become less accurate. MNLE with experimental conditions \u00b6 In the perceptual decision-making research it is common to design experiments with varying experimental decisions, e.g., to vary the difficulty of the task. During parameter inference, it can be beneficial to incorporate the experimental conditions. In MNLE, we are learning an emulator that should be able to generate synthetic experimental data including reaction times and choices given different experimental conditions. Thus, to make MNLE work with experimental conditions, we need to include them in the training process, i.e., treat them like auxiliary parameters of the simulator: # define a simulator wrapper in which the experimental condition are contained in theta and passed to the simulator. def sim_wrapper ( theta ): # simulate with experiment conditions return mixed_simulator ( theta = theta [:, : 2 ], concentration_scaling = theta [:, 2 :] + 1 , # add 1 to deal with 0 values from Categorical distribution ) # Define a proposal that contains both, priors for the parameters and a discrte prior over experimental conditions. proposal = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), Categorical ( probs = torch . ones ( 1 , 3 )), ], validate_args = False , ) # Simulated data num_simulations = 10000 num_samples = 1000 theta = proposal . sample (( num_simulations ,)) x = sim_wrapper ( theta ) assert x . shape == ( num_simulations , 2 ) # simulate observed data and define ground truth parameters num_trials = 10 theta_o = proposal . sample (( 1 ,)) theta_o [ 0 , 2 ] = 2.0 # set condition to 2 as in original simulator. x_o = sim_wrapper ( theta_o . repeat ( num_trials , 1 )) Obtain ground truth posterior via MCMC \u00b6 We obtain a ground-truth posterior via MCMC by using the PotentialFunctionProvider. For that, we first the define the actual prior, i.e., the distribution over the parameter we want to infer (not the proposal). Thus, we leave out the discrete prior over experimental conditions. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , ) prior_transform = mcmc_transform ( prior ) # We can now use the PotentialFunctionProvider to obtain a ground-truth posterior via MCMC. true_posterior_samples = MCMCPosterior ( PotentialFunctionProvider ( prior , x_o , concentration_scaling = float ( theta_o [ 0 , 2 ]) + 1.0 , # add one because the sim_wrapper adds one (see above) ), theta_transform = prior_transform , proposal = prior , ** mcmc_kwargs , ) . sample (( num_samples ,), show_progress_bars = True ) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 10 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Train MNLE including experimental conditions \u00b6 trainer = MNLE ( proposal ) estimator = trainer . append_simulations ( theta , x ) . train ( training_batch_size = 100 ) /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 73 epochs. Construct conditional potential function \u00b6 To obtain posterior samples conditioned on a particular experimental condition (and on x_o), we need to construct a corresponding potential function. # First, we define the potential function for the complete, unconditional MNLE-likelihood. potential_fn = MixedLikelihoodBasedPotential ( estimator , proposal , x_o ) # Then we use the potential to construct the conditional potential function. # Here, we tell the constructor to condition on the last dimension (index 2) by passing dims_to_sample=[0, 1]. conditioned_potential_fn = ConditionedPotential ( potential_fn , condition = theta_o , dims_to_sample = [ 0 , 1 ], allow_iid_x = True , # we also need to explicitly tell that MNLE allows iid_x ) # Using this potential function, we can now obtain conditional samples. mnle_posterior = MCMCPosterior ( potential_fn = conditioned_potential_fn , theta_transform = prior_transform , proposal = prior , ** mcmc_kwargs ) conditional_samples = mnle_posterior . sample (( num_samples ,), x = x_o ) Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] # Finally, we can compare the ground truth conditional posterior with the MNLE-conditional posterior. fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_posterior_samples , conditional_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); They match accurately, showing that we can indeed post-hoc condition the trained MNLE likelihood on different experimental conditions.","title":"Decision making model"},{"location":"examples/01_decision_making_model/#sbi-for-decision-making-models","text":"In a previous tutorial , we showed how to use SBI with trial-based iid data. Such scenarios can arise, for example, in models of perceptual decision making. In addition to trial-based iid data points, these models often come with mixed data types and varying experimental conditions. Here, we show how sbi can be used to perform inference in such models with the MNLE method.","title":"SBI for decision-making models"},{"location":"examples/01_decision_making_model/#trial-based-sbi-with-mixed-data-types","text":"In some cases, models with trial-based data additionally return data with mixed data types, e.g., continous and discrete data. For example, most computational models of decision-making have continuous reaction times and discrete choices as output. This can induce a problem when performing trial-based SBI that relies on learning a neural likelihood: It is challenging for most density estimators to handle both, continuous and discrete data at the same time. However, there is a recent SBI method for solving this problem, it\u2019s called Mixed Neural Likelihood Estimation (MNLE). It works just like NLE, but with mixed data types. The trick is that it learns two separate density estimators, one for the discrete part of the data, and one for the continuous part, and combines the two to obtain the final neural likelihood. Crucially, the continuous density estimator is trained conditioned on the output of the discrete one, such that statistical dependencies between the discrete and continuous data (e.g., between choices and reaction times) are modeled as well. The interested reader is referred to the original paper available here . MNLE was recently added to sbi (see this PR and also issue ) and follows the same API as SNLE . In this tutorial we will show how to apply MNLE to mixed data, and how to deal with varying experimental conditions.","title":"Trial-based SBI with mixed data types"},{"location":"examples/01_decision_making_model/#toy-problem-for-mnle","text":"To illustrate MNLE we set up a toy simulator that outputs mixed data and for which we know the likelihood such we can obtain reference posterior samples via MCMC. Simulator : To simulate mixed data we do the following Sample reaction time from inverse Gamma Sample choices from Binomial Return reaction time \\(rt \\in (0, \\infty)\\) and choice index \\(c \\in \\{0, 1\\}\\) \\[ c \\sim \\text{Binomial}(\\rho) \\\\ rt \\sim \\text{InverseGamma}(\\alpha=2, \\beta) \\\\ \\] Prior : The priors of the two parameters \\(\\rho\\) and \\(\\beta\\) are independent. We define a Beta prior over the probabilty parameter of the Binomial used in the simulator and a Gamma prior over the shape-parameter of the inverse Gamma used in the simulator: \\[ p(\\beta, \\rho) = p(\\beta) \\; p(\\rho) ; \\\\ p(\\beta) = \\text{Gamma}(1, 0.5) \\\\ p(\\text{probs}) = \\text{Beta}(2, 2) \\] Because the InverseGamma and the Binomial likelihoods are well-defined we can perform MCMC on this problem and obtain reference-posterior samples. import matplotlib.pyplot as plt import torch from torch import Tensor from sbi.inference import MNLE from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Categorical , Gamma from sbi.utils import MultipleIndependent from sbi.utils.metrics import c2st from sbi.analysis import pairplot from sbi.inference import MCMCPosterior from sbi.utils.torchutils import atleast_2d from sbi.inference.potentials.likelihood_based_potential import ( MixedLikelihoodBasedPotential , ) from sbi.utils.conditional_density_utils import ConditionedPotential from sbi.utils import mcmc_transform from sbi.inference.potentials.base_potential import BasePotential # Toy simulator for mixed data def mixed_simulator ( theta : Tensor , concentration_scaling : float = 1.0 ): \"\"\"Returns a sample from a mixed distribution given parameters theta. Args: theta: batch of parameters, shape (batch_size, 2) concentration_scaling: scaling factor for the concentration parameter of the InverseGamma distribution, mimics an experimental condition. \"\"\" beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = concentration_scaling * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # The potential function defines the ground truth likelihood and allows us to obtain reference posterior samples via MCMC. class PotentialFunctionProvider ( BasePotential ): allow_iid_x = True # type: ignore def __init__ ( self , prior , x_o , concentration_scaling = 1.0 , device = \"cpu\" ): super () . __init__ ( prior , x_o , device ) self . concentration_scaling = concentration_scaling def __call__ ( self , theta , track_gradients : bool = True ): theta = atleast_2d ( theta ) with torch . set_grad_enabled ( track_gradients ): iid_ll = self . iid_likelihood ( theta ) return iid_ll + self . prior . log_prob ( theta ) def iid_likelihood ( self , theta ): lp_choices = torch . stack ( [ Binomial ( probs = th . reshape ( 1 , - 1 )) . log_prob ( self . x_o [:, 1 :]) for th in theta [:, 1 :] ], dim = 1 , ) lp_rts = torch . stack ( [ InverseGamma ( concentration = self . concentration_scaling * torch . ones_like ( beta_i ), rate = beta_i , ) . log_prob ( self . x_o [:, : 1 ]) for beta_i in theta [:, : 1 ] ], dim = 1 , ) joint_likelihood = ( lp_choices + lp_rts ) . squeeze () assert joint_likelihood . shape == torch . Size ([ self . x_o . shape [ 0 ], theta . shape [ 0 ]]) return joint_likelihood . sum ( 0 ) # Define independent prior. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , )","title":"Toy problem for MNLE"},{"location":"examples/01_decision_making_model/#obtain-reference-posterior-samples-via-analytical-likelihood-and-mcmc","text":"torch . manual_seed ( 42 ) num_trials = 10 num_samples = 1000 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) mcmc_kwargs = dict ( num_chains = 20 , warmup_steps = 50 , method = \"slice_np_vectorized\" , init_strategy = \"proposal\" , ) true_posterior = MCMCPosterior ( potential_fn = PotentialFunctionProvider ( prior , x_o ), proposal = prior , theta_transform = mcmc_transform ( prior , enable_transform = True ), ** mcmc_kwargs , ) true_samples = true_posterior . sample (( num_samples ,)) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 10 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s]","title":"Obtain reference-posterior samples via analytical likelihood and MCMC"},{"location":"examples/01_decision_making_model/#train-mnle-and-generate-samples-via-mcmc","text":"# Training data num_simulations = 20000 # For training the MNLE emulator we need to define a proposal distribution, the prior is # a good choice. proposal = prior theta = proposal . sample (( num_simulations ,)) x = mixed_simulator ( theta ) # Train MNLE and obtain MCMC-based posterior. trainer = MNLE () estimator = trainer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ) /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 73 epochs. # Build posterior from the trained estimator and prior. mnle_posterior = trainer . build_posterior ( prior = prior ) mnle_samples = mnle_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s]","title":"Train MNLE and generate samples via MCMC"},{"location":"examples/01_decision_making_model/#compare-mnle-and-reference-posterior","text":"# Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_samples , mnle_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); We see that the inferred MNLE posterior nicely matches the reference posterior, and how both inferred a posterior that is quite different from the prior. Because MNLE training is amortized we can obtain another posterior given a different observation with potentially a different number of trials, just by running MCMC again (without re-training MNLE ):","title":"Compare MNLE and reference posterior"},{"location":"examples/01_decision_making_model/#repeat-inference-with-different-x_o-that-contains-more-trials","text":"num_trials = 50 x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) true_samples = true_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) mnle_samples = mnle_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 50 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_samples , mnle_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); print ( c2st ( true_samples , mnle_samples )[ 0 ]) tensor(0.5565) Again we can see that the posteriors match nicely. In addition, we observe that the posterior\u2019s (epistemic) uncertainty reduces as we increase the number of trials. Note: MNLE is trained on single-trial data. Theoretically, density estimation is perfectly accurate only in the limit of infinite training data. Thus, training with a finite amount of training data naturally induces a small bias in the density estimator. As we observed above, this bias is so small that we don\u2019t really notice it, e.g., the c2st scores were close to 0.5. However, when we increase the number of trials in x_o dramatically (on the order of 1000s) the small bias can accumulate over the trials and inference with MNLE can become less accurate.","title":"Repeat inference with different x_o that contains more trials"},{"location":"examples/01_decision_making_model/#mnle-with-experimental-conditions","text":"In the perceptual decision-making research it is common to design experiments with varying experimental decisions, e.g., to vary the difficulty of the task. During parameter inference, it can be beneficial to incorporate the experimental conditions. In MNLE, we are learning an emulator that should be able to generate synthetic experimental data including reaction times and choices given different experimental conditions. Thus, to make MNLE work with experimental conditions, we need to include them in the training process, i.e., treat them like auxiliary parameters of the simulator: # define a simulator wrapper in which the experimental condition are contained in theta and passed to the simulator. def sim_wrapper ( theta ): # simulate with experiment conditions return mixed_simulator ( theta = theta [:, : 2 ], concentration_scaling = theta [:, 2 :] + 1 , # add 1 to deal with 0 values from Categorical distribution ) # Define a proposal that contains both, priors for the parameters and a discrte prior over experimental conditions. proposal = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), Categorical ( probs = torch . ones ( 1 , 3 )), ], validate_args = False , ) # Simulated data num_simulations = 10000 num_samples = 1000 theta = proposal . sample (( num_simulations ,)) x = sim_wrapper ( theta ) assert x . shape == ( num_simulations , 2 ) # simulate observed data and define ground truth parameters num_trials = 10 theta_o = proposal . sample (( 1 ,)) theta_o [ 0 , 2 ] = 2.0 # set condition to 2 as in original simulator. x_o = sim_wrapper ( theta_o . repeat ( num_trials , 1 ))","title":"MNLE with experimental conditions"},{"location":"examples/01_decision_making_model/#train-mnle-including-experimental-conditions","text":"trainer = MNLE ( proposal ) estimator = trainer . append_simulations ( theta , x ) . train ( training_batch_size = 100 ) /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 73 epochs.","title":"Train MNLE including experimental conditions"},{"location":"examples/01_decision_making_model/#construct-conditional-potential-function","text":"To obtain posterior samples conditioned on a particular experimental condition (and on x_o), we need to construct a corresponding potential function. # First, we define the potential function for the complete, unconditional MNLE-likelihood. potential_fn = MixedLikelihoodBasedPotential ( estimator , proposal , x_o ) # Then we use the potential to construct the conditional potential function. # Here, we tell the constructor to condition on the last dimension (index 2) by passing dims_to_sample=[0, 1]. conditioned_potential_fn = ConditionedPotential ( potential_fn , condition = theta_o , dims_to_sample = [ 0 , 1 ], allow_iid_x = True , # we also need to explicitly tell that MNLE allows iid_x ) # Using this potential function, we can now obtain conditional samples. mnle_posterior = MCMCPosterior ( potential_fn = conditioned_potential_fn , theta_transform = prior_transform , proposal = prior , ** mcmc_kwargs ) conditional_samples = mnle_posterior . sample (( num_samples ,), x = x_o ) Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] # Finally, we can compare the ground truth conditional posterior with the MNLE-conditional posterior. fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_posterior_samples , conditional_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); They match accurately, showing that we can indeed post-hoc condition the trained MNLE likelihood on different experimental conditions.","title":"Construct conditional potential function"},{"location":"faq/question_01/","text":"What should I do when my \u2018posterior samples are outside of the prior support\u2019 in SNPE? \u00b6 When working with multi-round SNPE, you might have experienced the following warning: Only x% posterior samples are within the prior support. It may take a long time to collect the remaining 10000 samples. Consider interrupting (Ctrl-C) and switching to 'sample_with_mcmc=True'. This reason for this issue is described in more detail here and here . The following fixes are possible: sample with MCMC: samples = posterior((num_samples,), x=x_o, sample_with_mcmc=True) . This will make sampling slower, but samples will not \u2018leak\u2019. resort to single-round SNPE and (if necessary) increase your simulation budget. if your prior is either Gaussian (torch.distributions.multivariateNormal) or Uniform (sbi.utils.BoxUniform), you can avoid leakage by using a mixture density network as density estimator. I.e., using the flexible interface , set density_estimator='mdn' . When running inference, there should be a print statement \u201cUsing SNPE-C with non-atomic loss\u201d use a different algorithm, e.g. SNRE and SNLE. Note, however, that these algorithms can have different issues and potential pitfalls.","title":"What should I do when my &lsquo;posterior samples are outside of the prior support&rsquo; in SNPE?"},{"location":"faq/question_01/#what-should-i-do-when-my-posterior-samples-are-outside-of-the-prior-support-in-snpe","text":"When working with multi-round SNPE, you might have experienced the following warning: Only x% posterior samples are within the prior support. It may take a long time to collect the remaining 10000 samples. Consider interrupting (Ctrl-C) and switching to 'sample_with_mcmc=True'. This reason for this issue is described in more detail here and here . The following fixes are possible: sample with MCMC: samples = posterior((num_samples,), x=x_o, sample_with_mcmc=True) . This will make sampling slower, but samples will not \u2018leak\u2019. resort to single-round SNPE and (if necessary) increase your simulation budget. if your prior is either Gaussian (torch.distributions.multivariateNormal) or Uniform (sbi.utils.BoxUniform), you can avoid leakage by using a mixture density network as density estimator. I.e., using the flexible interface , set density_estimator='mdn' . When running inference, there should be a print statement \u201cUsing SNPE-C with non-atomic loss\u201d use a different algorithm, e.g. SNRE and SNLE. Note, however, that these algorithms can have different issues and potential pitfalls.","title":"What should I do when my 'posterior samples are outside of the prior support' in SNPE?"},{"location":"faq/question_02/","text":"Can the algorithms deal with invalid data, e.g. NaN or inf? \u00b6 Yes. By default, whenever a simulation returns at least one NaN or inf , it is completely excluded from the training data. In other words, the simulation is simply discarded. In cases where a very large fraction of simulations return NaN or inf , discarding many simulations can be wasteful. There are two options to deal with this: Either, you use the RestrictionEstimator to learn regions in parameter space that do not produce NaN or inf , see here . Alternatively, you can manually substitute the \u2018invalid\u2019 values with a reasonable replacement. I.e., at the end of your simulation code, you search for invalid entries and replace them with a floating point number. Importantly, in order for neural network training work well, the floating point number should still be in a reasonable range, i.e. maybe a few standard deviations outside of \u2018good\u2019 values. If you are running multi-round SNPE, however, things can go fully wrong if invalid data are encountered. In that case, you will get the following warning When invalid simulations are excluded, multi-round SNPE-C can leak into the regions where parameters led to invalid simulations. This can lead to poor results. Hence, if you are running multi-round SNPE and a significant fraction of simulations returns at least one invalid number, we strongly recommend to manually replace the value in your simulation code as described above (or resort to single-round SNPE or use a different method).","title":"Can the algorithms deal with invalid data, e.g. NaN or inf?"},{"location":"faq/question_02/#can-the-algorithms-deal-with-invalid-data-eg-nan-or-inf","text":"Yes. By default, whenever a simulation returns at least one NaN or inf , it is completely excluded from the training data. In other words, the simulation is simply discarded. In cases where a very large fraction of simulations return NaN or inf , discarding many simulations can be wasteful. There are two options to deal with this: Either, you use the RestrictionEstimator to learn regions in parameter space that do not produce NaN or inf , see here . Alternatively, you can manually substitute the \u2018invalid\u2019 values with a reasonable replacement. I.e., at the end of your simulation code, you search for invalid entries and replace them with a floating point number. Importantly, in order for neural network training work well, the floating point number should still be in a reasonable range, i.e. maybe a few standard deviations outside of \u2018good\u2019 values. If you are running multi-round SNPE, however, things can go fully wrong if invalid data are encountered. In that case, you will get the following warning When invalid simulations are excluded, multi-round SNPE-C can leak into the regions where parameters led to invalid simulations. This can lead to poor results. Hence, if you are running multi-round SNPE and a significant fraction of simulations returns at least one invalid number, we strongly recommend to manually replace the value in your simulation code as described above (or resort to single-round SNPE or use a different method).","title":"Can the algorithms deal with invalid data, e.g. NaN or inf?"},{"location":"faq/question_03/","text":"When using multiple workers, I get a pickling error. Can I still use multiprocessing? \u00b6 Yes, but you will have to make a few adjustments to your code. Some background: when using num_workers > 1 , you might experience an error that a certain object from your simulator could not be pickled (an example can be found here ). This can be fixed by forcing sbi to pickle with dill instead of the default cloudpickle . To do so, adjust your code as follows: Install dill : pip install dill At the very beginning of your python script, set the pickler to dill : from joblib.externals.loky import set_loky_pickler set_loky_pickler ( \"dill\" ) Move all imports required by your simulator into the simulator: # Imports specified outside of the simulator will break dill: import torch def my_simulator ( parameters ): return torch . ones ( 1 , 10 ) # Therefore, move the imports into the simulator: def my_simulator ( parameters ): import torch return torch . ones ( 1 , 10 ) Alternative: parallelize yourself \u00b6 You can also write your own code to parallelize simulations with whatever multiprocessing framework you prefer. You can then simulate your data outside of sbi and pass the simulated data as shown in the flexible interface : Some more background \u00b6 sbi uses joblib to parallelize simulations, which in turn uses pickle or cloudpickle to serialize the simulator. Almost all simulators will be picklable with cloudpickle , but we have experienced issues e.g. with neuron simulators, see here .","title":"When using multiple workers, I get a pickling error. Can I still use multiprocessing?"},{"location":"faq/question_03/#when-using-multiple-workers-i-get-a-pickling-error-can-i-still-use-multiprocessing","text":"Yes, but you will have to make a few adjustments to your code. Some background: when using num_workers > 1 , you might experience an error that a certain object from your simulator could not be pickled (an example can be found here ). This can be fixed by forcing sbi to pickle with dill instead of the default cloudpickle . To do so, adjust your code as follows: Install dill : pip install dill At the very beginning of your python script, set the pickler to dill : from joblib.externals.loky import set_loky_pickler set_loky_pickler ( \"dill\" ) Move all imports required by your simulator into the simulator: # Imports specified outside of the simulator will break dill: import torch def my_simulator ( parameters ): return torch . ones ( 1 , 10 ) # Therefore, move the imports into the simulator: def my_simulator ( parameters ): import torch return torch . ones ( 1 , 10 )","title":"When using multiple workers, I get a pickling error. Can I still use multiprocessing?"},{"location":"faq/question_03/#alternative-parallelize-yourself","text":"You can also write your own code to parallelize simulations with whatever multiprocessing framework you prefer. You can then simulate your data outside of sbi and pass the simulated data as shown in the flexible interface :","title":"Alternative: parallelize yourself"},{"location":"faq/question_03/#some-more-background","text":"sbi uses joblib to parallelize simulations, which in turn uses pickle or cloudpickle to serialize the simulator. Almost all simulators will be picklable with cloudpickle , but we have experienced issues e.g. with neuron simulators, see here .","title":"Some more background"},{"location":"faq/question_04/","text":"Can I use the GPU for training the density estimator? \u00b6 TLDR; Yes, by passing device=\"cuda\" and by passing a prior that lives on the device name your passed. But no speed-ups for default density estimators. Yes. When creating the inference object in the flexible interface, you can pass the device as an argument, e.g., inference = SNPE ( prior , device = \"cuda\" , density_estimator = \"maf\" ) The device is set to \"cpu\" by default, and it can be set to anything, as long as it maps to an existing PyTorch CUDA device. sbi will take care of copying the net and the training data to and from the device . Note that the prior must be on the training device already, e.g., when passing device=\"cuda:0\" , make sure to pass a prior object that was created on that device, e.g., prior = torch.distributions.MultivariateNormal(loc=torch.zeros(2, device=\"cuda:0\"), covariance_matrix=torch.eye(2, device=\"cuda:0\")) . Performance \u00b6 Whether or not you reduce your training time when training on a GPU depends on the problem at hand. We provide a couple of default density estimators for SNPE , SNLE and SNRE , e.g., a mixture density network ( density_estimator=\"mdn\" ) or a Masked Autoregressive Flow ( density_estimator=\"maf\" ). For those default density estimators we do not expect a speed up. This is because the underlying neural networks are quite shallow and not tall, e.g., they do not have many parameters or matrix operations that profit a lot from being executed on the GPU. A speed up through training on the GPU will most likely become visible when you are using convolutional modules in your neural networks. E.g., when passing an embedding net for image processing like in this example: https://github.com/sbi-dev/sbi/blob/main/tutorials/05_embedding_net.ipynb .","title":"Can I use the GPU for training the density estimator?"},{"location":"faq/question_04/#can-i-use-the-gpu-for-training-the-density-estimator","text":"TLDR; Yes, by passing device=\"cuda\" and by passing a prior that lives on the device name your passed. But no speed-ups for default density estimators. Yes. When creating the inference object in the flexible interface, you can pass the device as an argument, e.g., inference = SNPE ( prior , device = \"cuda\" , density_estimator = \"maf\" ) The device is set to \"cpu\" by default, and it can be set to anything, as long as it maps to an existing PyTorch CUDA device. sbi will take care of copying the net and the training data to and from the device . Note that the prior must be on the training device already, e.g., when passing device=\"cuda:0\" , make sure to pass a prior object that was created on that device, e.g., prior = torch.distributions.MultivariateNormal(loc=torch.zeros(2, device=\"cuda:0\"), covariance_matrix=torch.eye(2, device=\"cuda:0\")) .","title":"Can I use the GPU for training the density estimator?"},{"location":"faq/question_04/#performance","text":"Whether or not you reduce your training time when training on a GPU depends on the problem at hand. We provide a couple of default density estimators for SNPE , SNLE and SNRE , e.g., a mixture density network ( density_estimator=\"mdn\" ) or a Masked Autoregressive Flow ( density_estimator=\"maf\" ). For those default density estimators we do not expect a speed up. This is because the underlying neural networks are quite shallow and not tall, e.g., they do not have many parameters or matrix operations that profit a lot from being executed on the GPU. A speed up through training on the GPU will most likely become visible when you are using convolutional modules in your neural networks. E.g., when passing an embedding net for image processing like in this example: https://github.com/sbi-dev/sbi/blob/main/tutorials/05_embedding_net.ipynb .","title":"Performance"},{"location":"faq/question_05/","text":"How should I save and load objects in sbi ? \u00b6 NeuralPosterior objects are picklable. import pickle # ... run inference posterior = inference . build_posterior () with open ( \"/path/to/my_posterior.pkl\" , \"wb\" ) as handle : pickle . dump ( posterior , handle ) Note: posterior objects that were saved under sbi v0.17.2 or older can not be loaded under sbi v0.18.0 or newer. Note: if you try to load a posterior that was saved under sbi v0.14.x or earlier under sbi v0.15.x until sbi v0.17.x , you have to add: import sys from sbi.utils import user_input_checks_utils sys . modules [ \"sbi.user_input.user_input_checks_utils\" ] = user_input_checks_utils to your script before loading the posterior. As of sbi v0.18.0 , NeuralInference objects are also picklable. import pickle # ... run inference posterior = inference . build_posterior () with open ( \"/path/to/my_inference.pkl\" , \"wb\" ) as handle : pickle . dump ( inference , handle ) However, saving and loading the inference object will slightly modify the object (in order to make it serializable). These modifications lead to the following two changes in behaviour: 1) Retraining from scratch is not supported, i.e. .train(..., retrain_from_scratch=True) does not work. 2) When the loaded object calls the .train() method, it generates a new tensorboard summary writer (instead of appending to the current one).","title":"How should I save and load objects in sbi?"},{"location":"faq/question_05/#how-should-i-save-and-load-objects-in-sbi","text":"NeuralPosterior objects are picklable. import pickle # ... run inference posterior = inference . build_posterior () with open ( \"/path/to/my_posterior.pkl\" , \"wb\" ) as handle : pickle . dump ( posterior , handle ) Note: posterior objects that were saved under sbi v0.17.2 or older can not be loaded under sbi v0.18.0 or newer. Note: if you try to load a posterior that was saved under sbi v0.14.x or earlier under sbi v0.15.x until sbi v0.17.x , you have to add: import sys from sbi.utils import user_input_checks_utils sys . modules [ \"sbi.user_input.user_input_checks_utils\" ] = user_input_checks_utils to your script before loading the posterior. As of sbi v0.18.0 , NeuralInference objects are also picklable. import pickle # ... run inference posterior = inference . build_posterior () with open ( \"/path/to/my_inference.pkl\" , \"wb\" ) as handle : pickle . dump ( inference , handle ) However, saving and loading the inference object will slightly modify the object (in order to make it serializable). These modifications lead to the following two changes in behaviour: 1) Retraining from scratch is not supported, i.e. .train(..., retrain_from_scratch=True) does not work. 2) When the loaded object calls the .train() method, it generates a new tensorboard summary writer (instead of appending to the current one).","title":"How should I save and load objects in sbi?"},{"location":"faq/question_06/","text":"Can I stop neural network training and resume it later? \u00b6 Many clusters have a time limit and sbi might exceed this limit. You can circumvent this problem by using the flexible interface . After simulations are finished, sbi trains a neural network. If this process takes too long, you can stop training and resume it later. The syntax is: inference = SNPE ( prior = prior ) inference = inference . append_simulations ( theta , x ) inference . train ( max_num_epochs = 300 ) # Pick `max_num_epochs` such that it does not exceed the runtime. with open ( \"path/to/my/inference.pkl\" , \"wb\" ) as handle : pickle . dump ( inference , handle ) # To resume training: with open ( \"path/to/my/inference.pkl\" , \"rb\" ) as handle : inference_from_disk = pickle . load ( handle ) inference_from_disk . train ( resume_training = True , max_num_epochs = 600 ) # Run epochs 301 until 600 (or stop early). posterior = inference_from_disk . build_posterior ()","title":"Can I stop neural network training and resume it later?"},{"location":"faq/question_06/#can-i-stop-neural-network-training-and-resume-it-later","text":"Many clusters have a time limit and sbi might exceed this limit. You can circumvent this problem by using the flexible interface . After simulations are finished, sbi trains a neural network. If this process takes too long, you can stop training and resume it later. The syntax is: inference = SNPE ( prior = prior ) inference = inference . append_simulations ( theta , x ) inference . train ( max_num_epochs = 300 ) # Pick `max_num_epochs` such that it does not exceed the runtime. with open ( \"path/to/my/inference.pkl\" , \"wb\" ) as handle : pickle . dump ( inference , handle ) # To resume training: with open ( \"path/to/my/inference.pkl\" , \"rb\" ) as handle : inference_from_disk = pickle . load ( handle ) inference_from_disk . train ( resume_training = True , max_num_epochs = 600 ) # Run epochs 301 until 600 (or stop early). posterior = inference_from_disk . build_posterior ()","title":"Can I stop neural network training and resume it later?"},{"location":"faq/question_07/","text":"Can I use a custom prior with sbi? \u00b6 sbi works with torch distributions only so we recommend to use those whenever possible. For example, when you are used to using scipy.stats distributions as priors then we recommend using the corresponding torch.distributions , most common distributions are implemented there. In case you want to use a custom prior that is not in the set of common distributions that\u2019s possible as well: You need to write a prior class that mimicks the behaviour of a torch.distributions.Distribution class. Then sbi will wrap this class to make it a fully functional torch Distribution . Essentially, the class needs two methods: .sample(sample_shape) , where sample_shape is a shape tuple, e.g., (n,) , and returns a batch of n samples, e.g., of shape (n, 2)` for a two dimenional prior. .log_prob(value) method that returns the \u201clog probs\u201d of parameters under the prior, e.g., for a batches of n parameters with shape (n, ndims) it should return a log probs array of shape (n,) . For sbi > 0.17.2 this could look like the following: class CustomUniformPrior : \"\"\"User defined numpy uniform prior. Custom prior with user-defined valid .sample and .log_prob methods. \"\"\" def __init__ ( self , lower : Tensor , upper : Tensor , return_numpy : bool = False ): self . lower = lower self . upper = upper self . dist = BoxUniform ( lower , upper ) self . return_numpy = return_numpy def sample ( self , sample_shape = torch . Size ([])): samples = self . dist . sample ( sample_shape ) return samples . numpy () if self . return_numpy else samples def log_prob ( self , values ): if self . return_numpy : values = torch . as_tensor ( values ) log_probs = self . dist . log_prob ( values ) return log_probs . numpy () if self . return_numpy else log_probs Once you have such a class you can wrap into a Distribution using the process_prior function sbi provides: from sbi.utils import process_prior custom_prior = CustomUniformPrior ( torch . zeros ( 2 ), torch . ones ( 2 )) prior , * _ = process_prior ( custom_prior ) # Keeping only the first return. # use this wrapped prior in sbi... In sbi it is sometimes required to check the support of the prior, e.g., when the prior support is bounded and one wants to reject samples from the posterior density estimator that lie outside the prior support. In torch Distributions this is handled automatically, however, when using a custom prior it is not. Thus, if your prior has bounded support (like the one above) it makes sense to pass the bounds to the wrapper function such that sbi can pass them to torch Distributions : from sbi.utils import process_prior custom_prior = CustomUniformPrior ( torch . zeros ( 2 ), torch . ones ( 2 )) prior = process_prior ( custom_prior , custom_prior_wrapper_kwargs = dict ( lower_bound = torch . zeros ( 2 ), upper_bound = torch . ones ( 2 ))) # use this wrapped prior in sbi... Note that in custom_prior_wrapper_kwargs you can pass additinal arguments for the wrapper, e.g., validate_args or arg_constraints see the Distribution documentation for more details. If you are running sbi < 0.17.2 and use SNLE the code above will produce a NotImplementedError (see #581 ). In this case you need to update to a newer version of sbi or use SNPE instead.","title":"Can I use a custom prior with sbi?"},{"location":"faq/question_07/#can-i-use-a-custom-prior-with-sbi","text":"sbi works with torch distributions only so we recommend to use those whenever possible. For example, when you are used to using scipy.stats distributions as priors then we recommend using the corresponding torch.distributions , most common distributions are implemented there. In case you want to use a custom prior that is not in the set of common distributions that\u2019s possible as well: You need to write a prior class that mimicks the behaviour of a torch.distributions.Distribution class. Then sbi will wrap this class to make it a fully functional torch Distribution . Essentially, the class needs two methods: .sample(sample_shape) , where sample_shape is a shape tuple, e.g., (n,) , and returns a batch of n samples, e.g., of shape (n, 2)` for a two dimenional prior. .log_prob(value) method that returns the \u201clog probs\u201d of parameters under the prior, e.g., for a batches of n parameters with shape (n, ndims) it should return a log probs array of shape (n,) . For sbi > 0.17.2 this could look like the following: class CustomUniformPrior : \"\"\"User defined numpy uniform prior. Custom prior with user-defined valid .sample and .log_prob methods. \"\"\" def __init__ ( self , lower : Tensor , upper : Tensor , return_numpy : bool = False ): self . lower = lower self . upper = upper self . dist = BoxUniform ( lower , upper ) self . return_numpy = return_numpy def sample ( self , sample_shape = torch . Size ([])): samples = self . dist . sample ( sample_shape ) return samples . numpy () if self . return_numpy else samples def log_prob ( self , values ): if self . return_numpy : values = torch . as_tensor ( values ) log_probs = self . dist . log_prob ( values ) return log_probs . numpy () if self . return_numpy else log_probs Once you have such a class you can wrap into a Distribution using the process_prior function sbi provides: from sbi.utils import process_prior custom_prior = CustomUniformPrior ( torch . zeros ( 2 ), torch . ones ( 2 )) prior , * _ = process_prior ( custom_prior ) # Keeping only the first return. # use this wrapped prior in sbi... In sbi it is sometimes required to check the support of the prior, e.g., when the prior support is bounded and one wants to reject samples from the posterior density estimator that lie outside the prior support. In torch Distributions this is handled automatically, however, when using a custom prior it is not. Thus, if your prior has bounded support (like the one above) it makes sense to pass the bounds to the wrapper function such that sbi can pass them to torch Distributions : from sbi.utils import process_prior custom_prior = CustomUniformPrior ( torch . zeros ( 2 ), torch . ones ( 2 )) prior = process_prior ( custom_prior , custom_prior_wrapper_kwargs = dict ( lower_bound = torch . zeros ( 2 ), upper_bound = torch . ones ( 2 ))) # use this wrapped prior in sbi... Note that in custom_prior_wrapper_kwargs you can pass additinal arguments for the wrapper, e.g., validate_args or arg_constraints see the Distribution documentation for more details. If you are running sbi < 0.17.2 and use SNLE the code above will produce a NotImplementedError (see #581 ). In this case you need to update to a newer version of sbi or use SNPE instead.","title":"Can I use a custom prior with sbi?"},{"location":"tutorial/00_getting_started/","text":"Getting started with sbi \u00b6 Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/00_getting_started.ipynb in the sbi repository. import torch from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer Running the inference procedure \u00b6 sbi provides a simple interface to run state-of-the-art algorithms for simulation-based inference. For inference, you need to provide two ingredients: 1) a prior distribution that allows to sample parameter sets. 2) a simulator that takes parameter sets and produces simulation outputs. For example, we can have a 3-dimensional parameter space with a uniform prior between [-1,1] and a simple simulator that for the sake of example adds 1.0 and some Gaussian noise to the parameter set: num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def simulator ( parameter_set ): return 1.0 + parameter_set + torch . randn ( parameter_set . shape ) * 0.1 sbi can then run inference: # Other methods are \"SNLE\" or \"SNRE\". posterior = infer ( simulator , prior , method = \"SNPE\" , num_simulations = 1000 ) Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Neural network successfully converged after 119 epochs. Let\u2019s say we have made some observation \\(x\\) : observation = torch . zeros ( 3 ) Given this observation, we can then sample from the posterior \\(p(\\theta|x)\\) , evaluate its log-probability, or plot it. samples = posterior . sample (( 10000 ,), x = observation ) log_probability = posterior . log_prob ( samples , x = observation ) _ = analysis . pairplot ( samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 6 , 6 )) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] Next steps \u00b6 The single-line interface described above provides an easy entry for using sbi . However, on almost any real-world problem that goes beyond a simple demonstration, we strongly recommend using the flexible interface .","title":"Getting started"},{"location":"tutorial/00_getting_started/#getting-started-with-sbi","text":"Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/00_getting_started.ipynb in the sbi repository. import torch from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer","title":"Getting started with sbi"},{"location":"tutorial/00_getting_started/#running-the-inference-procedure","text":"sbi provides a simple interface to run state-of-the-art algorithms for simulation-based inference. For inference, you need to provide two ingredients: 1) a prior distribution that allows to sample parameter sets. 2) a simulator that takes parameter sets and produces simulation outputs. For example, we can have a 3-dimensional parameter space with a uniform prior between [-1,1] and a simple simulator that for the sake of example adds 1.0 and some Gaussian noise to the parameter set: num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def simulator ( parameter_set ): return 1.0 + parameter_set + torch . randn ( parameter_set . shape ) * 0.1 sbi can then run inference: # Other methods are \"SNLE\" or \"SNRE\". posterior = infer ( simulator , prior , method = \"SNPE\" , num_simulations = 1000 ) Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Neural network successfully converged after 119 epochs. Let\u2019s say we have made some observation \\(x\\) : observation = torch . zeros ( 3 ) Given this observation, we can then sample from the posterior \\(p(\\theta|x)\\) , evaluate its log-probability, or plot it. samples = posterior . sample (( 10000 ,), x = observation ) log_probability = posterior . log_prob ( samples , x = observation ) _ = analysis . pairplot ( samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 6 , 6 )) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s]","title":"Running the inference procedure"},{"location":"tutorial/00_getting_started/#next-steps","text":"The single-line interface described above provides an easy entry for using sbi . However, on almost any real-world problem that goes beyond a simple demonstration, we strongly recommend using the flexible interface .","title":"Next steps"},{"location":"tutorial/01_gaussian_amortized/","text":"Amortized posterior inference on Gaussian example \u00b6 Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/01_gaussian_amortized.ipynb in the sbi repository. In this tutorial, we will demonstrate how sbi can infer an amortized posterior for a simple toy model with a uniform prior and Gaussian likelihood. import torch import numpy as np from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer Defining prior, simulator, and running inference \u00b6 Say we have a 3-dimensional parameter space, and the prior is uniformly distributed between -2 and 2 in each dimension, i.e. \\(\\theta \\in [-2,2], y\\in [-2,2], z \\in [-2,2]\\) . num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) Our simulator takes the input parameters, adds 1.0 in each dimension, and then adds some Gaussian noise: def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 We can then run inference (either with the simple interface of with the flexible interface): posterior = infer ( linear_gaussian , prior , \"SNPE\" , num_simulations = 1000 ) Amortized inference \u00b6 Note that we have not yet provided an observation to the inference procedure. In fact, we can evaluate the posterior for different observations without having to re-run inference. This is called amortization. An amortized posterior is one that is not focused on any particular observation. Naturally, if the diversity of observations is large, any of the inference methods will need to run a sufficient number of simulations for the resulting posterior to perform well across these diverse observations. Let\u2019s say we have two observations x_o_1 = [0,0,0] and x_o_2 = [2,2,2] : x_o_1 = torch . zeros ( 3 ,) x_o_2 = 2.0 * torch . ones ( 3 ,) We can draw samples from the posterior given x_o_1 and then plot them: posterior_samples_1 = posterior . sample (( 10000 ,), x = x_o_1 ) # plot posterior samples _ = analysis . pairplot ( posterior_samples_1 , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] As it can be seen, the posterior samples are centered around [-1,-1,-1] in each dimension. This makes sense because the simulator always adds 1.0 in each dimension and we have observed x_o_1 = [0,0,0] . Since the learned posterior is amortized, we can also draw samples from the posterior given the second observation without having to re-run inference: posterior_samples_2 = posterior . sample (( 10000 ,), x = x_o_2 ) # plot posterior samples _ = analysis . pairplot ( posterior_samples_2 , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] So, if we observed x_o_2 = [2,2,2] , the posterior is centered around [1,1,1] \u2013 again, this makes sense because the simulator adds 1.0 in each dimension.","title":"Amortized inference"},{"location":"tutorial/01_gaussian_amortized/#amortized-posterior-inference-on-gaussian-example","text":"Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/01_gaussian_amortized.ipynb in the sbi repository. In this tutorial, we will demonstrate how sbi can infer an amortized posterior for a simple toy model with a uniform prior and Gaussian likelihood. import torch import numpy as np from sbi import utils as utils from sbi import analysis as analysis from sbi.inference.base import infer","title":"Amortized posterior inference on Gaussian example"},{"location":"tutorial/01_gaussian_amortized/#defining-prior-simulator-and-running-inference","text":"Say we have a 3-dimensional parameter space, and the prior is uniformly distributed between -2 and 2 in each dimension, i.e. \\(\\theta \\in [-2,2], y\\in [-2,2], z \\in [-2,2]\\) . num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) Our simulator takes the input parameters, adds 1.0 in each dimension, and then adds some Gaussian noise: def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 We can then run inference (either with the simple interface of with the flexible interface): posterior = infer ( linear_gaussian , prior , \"SNPE\" , num_simulations = 1000 )","title":"Defining prior, simulator, and running inference"},{"location":"tutorial/01_gaussian_amortized/#amortized-inference","text":"Note that we have not yet provided an observation to the inference procedure. In fact, we can evaluate the posterior for different observations without having to re-run inference. This is called amortization. An amortized posterior is one that is not focused on any particular observation. Naturally, if the diversity of observations is large, any of the inference methods will need to run a sufficient number of simulations for the resulting posterior to perform well across these diverse observations. Let\u2019s say we have two observations x_o_1 = [0,0,0] and x_o_2 = [2,2,2] : x_o_1 = torch . zeros ( 3 ,) x_o_2 = 2.0 * torch . ones ( 3 ,) We can draw samples from the posterior given x_o_1 and then plot them: posterior_samples_1 = posterior . sample (( 10000 ,), x = x_o_1 ) # plot posterior samples _ = analysis . pairplot ( posterior_samples_1 , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] As it can be seen, the posterior samples are centered around [-1,-1,-1] in each dimension. This makes sense because the simulator always adds 1.0 in each dimension and we have observed x_o_1 = [0,0,0] . Since the learned posterior is amortized, we can also draw samples from the posterior given the second observation without having to re-run inference: posterior_samples_2 = posterior . sample (( 10000 ,), x = x_o_2 ) # plot posterior samples _ = analysis . pairplot ( posterior_samples_2 , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] So, if we observed x_o_2 = [2,2,2] , the posterior is centered around [1,1,1] \u2013 again, this makes sense because the simulator adds 1.0 in each dimension.","title":"Amortized inference"},{"location":"tutorial/02_flexible_interface/","text":"The flexible interface \u00b6 In the previous tutorial, we have demonstrated how sbi can be used to run simulation-based inference with just a single line of code. In addition to this simple interface, sbi also provides a flexible interface which provides several additional features implemented in sbi . Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/02_flexible_interface.ipynb in the sbi repository. Features \u00b6 The flexible interface offers the following features (and many more): performing sequential posterior estimation by focusing on a particular observation over multiple rounds. This can decrease the number of simulations one has to run, but the inference procedure is no longer amortized ( tutorial ). specify your own density estimator, or change hyperparameters of existing ones (e.g. number of hidden units for NSF ) ( tutorial ). use an embedding_net to learn summary features from high-dimensional simulation outputs ( tutorial ). provide presimulated data choose between different methods to sample from the posterior. use calibration kernels as proposed by Lueckmann, Goncalves et al. 2017 . Main syntax \u00b6 from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi simulator , prior = prepare_for_sbi ( simulator , prior ) inference = SNPE ( prior ) theta , x = simulate_for_sbi ( simulator , proposal = prior , num_simulations = 1000 ) density_estimator = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior ( density_estimator ) Linear Gaussian example \u00b6 We will show an example of how we can use the flexible interface to infer the posterior for an example with a Gaussian likelihood (same example as before). First, we import the inference method we want to use ( SNPE , SNLE , or SNRE ) and other helper functions. import torch from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.utils.get_nn_models import posterior_nn from sbi import utils as utils from sbi import analysis as analysis Next, we define the prior and simulator: num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 In the flexible interface, you have to ensure that your simulator and prior adhere the requirements of sbi . You can do so with the prepare_for_sbi() function. simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) Then, we instantiate the inference object: inference = SNPE ( prior = prior ) Next, we run simulations. You can do so either by yourself by sampling from the prior and running the simulator (e.g. on a compute cluster), or you can use a helper function provided by sbi called simulate_for_sbi . This function allows to parallelize your code with joblib . theta , x = simulate_for_sbi ( simulator , proposal = prior , num_simulations = 2000 ) Running 2000 simulations.: 0%| | 0/2000 [00:00<?, ?it/s] We then pass the simulated data to the inference object. theta and x should both be a torch.Tensor of type float32 . inference = inference . append_simulations ( theta , x ) Next, we train the neural density estimator. density_estimator = inference . train () Neural network successfully converged after 73 epochs. Lastly, we can use this density estimator to build the posterior: posterior = inference . build_posterior ( density_estimator ) Once we have obtained the posterior, we can .sample() , .log_prob() , or .pairplot() in the same way as for the simple interface. x_o = torch . zeros ( 3 ,) posterior_samples = posterior . sample (( 10000 ,), x = x_o ) # plot posterior samples _ = analysis . pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] We can always print the posterior to know how it was trained: print ( posterior ) Posterior conditional density p(\u03b8|x) of type DirectPosterior. It samples the posterior network and rejects samples that lie outside of the prior bounds.","title":"Flexible interface"},{"location":"tutorial/02_flexible_interface/#the-flexible-interface","text":"In the previous tutorial, we have demonstrated how sbi can be used to run simulation-based inference with just a single line of code. In addition to this simple interface, sbi also provides a flexible interface which provides several additional features implemented in sbi . Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/02_flexible_interface.ipynb in the sbi repository.","title":"The flexible interface"},{"location":"tutorial/02_flexible_interface/#features","text":"The flexible interface offers the following features (and many more): performing sequential posterior estimation by focusing on a particular observation over multiple rounds. This can decrease the number of simulations one has to run, but the inference procedure is no longer amortized ( tutorial ). specify your own density estimator, or change hyperparameters of existing ones (e.g. number of hidden units for NSF ) ( tutorial ). use an embedding_net to learn summary features from high-dimensional simulation outputs ( tutorial ). provide presimulated data choose between different methods to sample from the posterior. use calibration kernels as proposed by Lueckmann, Goncalves et al. 2017 .","title":"Features"},{"location":"tutorial/02_flexible_interface/#main-syntax","text":"from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi simulator , prior = prepare_for_sbi ( simulator , prior ) inference = SNPE ( prior ) theta , x = simulate_for_sbi ( simulator , proposal = prior , num_simulations = 1000 ) density_estimator = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior ( density_estimator )","title":"Main syntax"},{"location":"tutorial/02_flexible_interface/#linear-gaussian-example","text":"We will show an example of how we can use the flexible interface to infer the posterior for an example with a Gaussian likelihood (same example as before). First, we import the inference method we want to use ( SNPE , SNLE , or SNRE ) and other helper functions. import torch from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.utils.get_nn_models import posterior_nn from sbi import utils as utils from sbi import analysis as analysis Next, we define the prior and simulator: num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 In the flexible interface, you have to ensure that your simulator and prior adhere the requirements of sbi . You can do so with the prepare_for_sbi() function. simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) Then, we instantiate the inference object: inference = SNPE ( prior = prior ) Next, we run simulations. You can do so either by yourself by sampling from the prior and running the simulator (e.g. on a compute cluster), or you can use a helper function provided by sbi called simulate_for_sbi . This function allows to parallelize your code with joblib . theta , x = simulate_for_sbi ( simulator , proposal = prior , num_simulations = 2000 ) Running 2000 simulations.: 0%| | 0/2000 [00:00<?, ?it/s] We then pass the simulated data to the inference object. theta and x should both be a torch.Tensor of type float32 . inference = inference . append_simulations ( theta , x ) Next, we train the neural density estimator. density_estimator = inference . train () Neural network successfully converged after 73 epochs. Lastly, we can use this density estimator to build the posterior: posterior = inference . build_posterior ( density_estimator ) Once we have obtained the posterior, we can .sample() , .log_prob() , or .pairplot() in the same way as for the simple interface. x_o = torch . zeros ( 3 ,) posterior_samples = posterior . sample (( 10000 ,), x = x_o ) # plot posterior samples _ = analysis . pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] We can always print the posterior to know how it was trained: print ( posterior ) Posterior conditional density p(\u03b8|x) of type DirectPosterior. It samples the posterior network and rejects samples that lie outside of the prior bounds.","title":"Linear Gaussian example"},{"location":"tutorial/03_multiround_inference/","text":"Multi-round inference \u00b6 In the previous tutorials, we have inferred the posterior using single-round inference . In single-round inference , we draw parameters from the prior, simulate the corresponding data, and then train a neural network to obtain the posterior. However, if one is interested in only one particular observation x_o sampling from the prior can be inefficient in the number of simulations because one is effectively learning a posterior estimate for all observations in the prior space. In this tutorial, we show how one can alleviate this issue by performing multi-round inference with sbi . Multi-round inference also starts by drawing parameters from the prior, simulating them, and training a neural network to estimate the posterior distribution. Afterwards, however, it continues inference in multiple rounds, focusing on a particular observation x_o . In each new round of inference, it draws samples from the obtained posterior distribution conditioned at x_o (instead of from the prior), simulates these, and trains the network again. This process can be repeated arbitrarily often to get increasingly good approximations to the true posterior distribution at x_o . Running multi-round inference can be more efficient in the number of simulations, but it will lead to the posterior no longer being amortized (i.e. it will be accurate only for a specific observation x_o , not for any x ). Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/03_multiround_inference.ipynb in the sbi repository. Main syntax \u00b6 # 2 rounds: first round simulates from the prior, second round simulates parameter set # that were sampled from the obtained posterior. num_rounds = 2 # The specific observation we want to focus the inference on. x_o = torch . zeros ( 3 , ) posteriors = [] proposal = prior for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) # In `SNLE` and `SNRE`, you should not pass the `proposal` to `.append_simulations()` density_estimator = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior ( density_estimator ) posteriors . append ( posterior ) proposal = posterior . set_default_x ( x_o ) Linear Gaussian example \u00b6 Below, we give a full example of inferring the posterior distribution over multiple rounds. import torch from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.utils.get_nn_models import posterior_nn from sbi import utils as utils from sbi import analysis as analysis _ = torch . manual_seed ( 0 ) First, we define a simple prior and simulator and ensure that they comply with sbi by using prepare_for_sbi : num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) Then, we instantiate the inference object: inference = SNPE ( prior = prior ) And we can run inference. In this example, we will run inference over 2 rounds, potentially leading to a more focused posterior around the observation x_o . num_rounds = 2 x_o = torch . zeros ( 3 , ) posteriors = [] proposal = prior for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) density_estimator = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior ( density_estimator ) posteriors . append ( posterior ) proposal = posterior . set_default_x ( x_o ) Running 500 simulations.: 0%| | 0/500 [00:00<?, ?it/s] Neural network successfully converged after 157 epochs. Drawing 500 posterior samples: 0%| | 0/500 [00:00<?, ?it/s] Running 500 simulations.: 0%| | 0/500 [00:00<?, ?it/s] Using SNPE-C with atomic loss Neural network successfully converged after 58 epochs. Note that, for num_rounds>1 , the posterior is no longer amortized: it will give good results when sampled around x=observation , but possibly bad results for other x . Once we have obtained the posterior, we can .sample() , .log_prob() , or .pairplot() in the same way as for the simple interface. posterior_samples = posterior . sample (( 10000 ,), x = x_o ) # plot posterior samples _ = analysis . pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s]","title":"Multi-round inference"},{"location":"tutorial/03_multiround_inference/#multi-round-inference","text":"In the previous tutorials, we have inferred the posterior using single-round inference . In single-round inference , we draw parameters from the prior, simulate the corresponding data, and then train a neural network to obtain the posterior. However, if one is interested in only one particular observation x_o sampling from the prior can be inefficient in the number of simulations because one is effectively learning a posterior estimate for all observations in the prior space. In this tutorial, we show how one can alleviate this issue by performing multi-round inference with sbi . Multi-round inference also starts by drawing parameters from the prior, simulating them, and training a neural network to estimate the posterior distribution. Afterwards, however, it continues inference in multiple rounds, focusing on a particular observation x_o . In each new round of inference, it draws samples from the obtained posterior distribution conditioned at x_o (instead of from the prior), simulates these, and trains the network again. This process can be repeated arbitrarily often to get increasingly good approximations to the true posterior distribution at x_o . Running multi-round inference can be more efficient in the number of simulations, but it will lead to the posterior no longer being amortized (i.e. it will be accurate only for a specific observation x_o , not for any x ). Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/03_multiround_inference.ipynb in the sbi repository.","title":"Multi-round inference"},{"location":"tutorial/03_multiround_inference/#main-syntax","text":"# 2 rounds: first round simulates from the prior, second round simulates parameter set # that were sampled from the obtained posterior. num_rounds = 2 # The specific observation we want to focus the inference on. x_o = torch . zeros ( 3 , ) posteriors = [] proposal = prior for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) # In `SNLE` and `SNRE`, you should not pass the `proposal` to `.append_simulations()` density_estimator = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior ( density_estimator ) posteriors . append ( posterior ) proposal = posterior . set_default_x ( x_o )","title":"Main syntax"},{"location":"tutorial/03_multiround_inference/#linear-gaussian-example","text":"Below, we give a full example of inferring the posterior distribution over multiple rounds. import torch from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.utils.get_nn_models import posterior_nn from sbi import utils as utils from sbi import analysis as analysis _ = torch . manual_seed ( 0 ) First, we define a simple prior and simulator and ensure that they comply with sbi by using prepare_for_sbi : num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) Then, we instantiate the inference object: inference = SNPE ( prior = prior ) And we can run inference. In this example, we will run inference over 2 rounds, potentially leading to a more focused posterior around the observation x_o . num_rounds = 2 x_o = torch . zeros ( 3 , ) posteriors = [] proposal = prior for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) density_estimator = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior ( density_estimator ) posteriors . append ( posterior ) proposal = posterior . set_default_x ( x_o ) Running 500 simulations.: 0%| | 0/500 [00:00<?, ?it/s] Neural network successfully converged after 157 epochs. Drawing 500 posterior samples: 0%| | 0/500 [00:00<?, ?it/s] Running 500 simulations.: 0%| | 0/500 [00:00<?, ?it/s] Using SNPE-C with atomic loss Neural network successfully converged after 58 epochs. Note that, for num_rounds>1 , the posterior is no longer amortized: it will give good results when sampled around x=observation , but possibly bad results for other x . Once we have obtained the posterior, we can .sample() , .log_prob() , or .pairplot() in the same way as for the simple interface. posterior_samples = posterior . sample (( 10000 ,), x = x_o ) # plot posterior samples _ = analysis . pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s]","title":"Linear Gaussian example"},{"location":"tutorial/04_density_estimators/","text":"Customizing the density estimator \u00b6 sbi allows to specify a custom density estimator for each of the implemented methods. For all options, check the API reference here . Changing the type of density estimator \u00b6 One option is to use one of set of preconfigured density estimators by passing a string in the density_estimator keyword argument to the inference object ( SNPE or SNLE ), e.g., \u201cmaf\u201d to use a Masked Autoregressive Flow, of \u201cnsf\u201d to use a Neural Spline Flow with default hyperparameters. inference = SNPE ( prior = prior , density_estimator = \"maf\" ) In the case of SNRE , the argument is called classifier : inference = SNRE ( prior = prior , classifier = \"resnet\" ) Changing hyperparameters of density estimators \u00b6 Alternatively, you can use a set of utils functions to configure a density estimator yourself, e.g., use a MAF with hyperparameters chosen for your problem at hand. Here, because we want to use SN*P*E, we specifiy a neural network targeting the posterior (using the utils function posterior_nn ). In this example, we will create a neural spline flow ( 'nsf' ) with 60 hidden units and 3 transform layers: from sbi.utils.get_nn_models import ( posterior_nn , ) # For SNLE: likelihood_nn(). For SNRE: classifier_nn() density_estimator_build_fun = posterior_nn ( model = \"nsf\" , hidden_features = 60 , num_transforms = 3 ) inference = SNPE ( prior = prior , density_estimator = density_estimator_build_fun ) It is also possible to pass an embedding_net to posterior_nn() which learn summary statistics from high-dimensional simulation outputs. You can find a more detailed tutorial on this here . Building new density estimators from scratch \u00b6 Finally, it is also possible to implement your own density estimator from scratch, e.g., including embedding nets to preprocess data, or to a density estimator architecture of your choice. For this, the density_estimator argument needs to be a function that takes theta and x batches as arguments to then construct the density estimator after the first set of simulations was generated. Our utils functions in sbi/utils/get_nn_models.py return such a function.","title":"Custom density estimators"},{"location":"tutorial/04_density_estimators/#customizing-the-density-estimator","text":"sbi allows to specify a custom density estimator for each of the implemented methods. For all options, check the API reference here .","title":"Customizing the density estimator"},{"location":"tutorial/04_density_estimators/#changing-the-type-of-density-estimator","text":"One option is to use one of set of preconfigured density estimators by passing a string in the density_estimator keyword argument to the inference object ( SNPE or SNLE ), e.g., \u201cmaf\u201d to use a Masked Autoregressive Flow, of \u201cnsf\u201d to use a Neural Spline Flow with default hyperparameters. inference = SNPE ( prior = prior , density_estimator = \"maf\" ) In the case of SNRE , the argument is called classifier : inference = SNRE ( prior = prior , classifier = \"resnet\" )","title":"Changing the type of density estimator"},{"location":"tutorial/04_density_estimators/#changing-hyperparameters-of-density-estimators","text":"Alternatively, you can use a set of utils functions to configure a density estimator yourself, e.g., use a MAF with hyperparameters chosen for your problem at hand. Here, because we want to use SN*P*E, we specifiy a neural network targeting the posterior (using the utils function posterior_nn ). In this example, we will create a neural spline flow ( 'nsf' ) with 60 hidden units and 3 transform layers: from sbi.utils.get_nn_models import ( posterior_nn , ) # For SNLE: likelihood_nn(). For SNRE: classifier_nn() density_estimator_build_fun = posterior_nn ( model = \"nsf\" , hidden_features = 60 , num_transforms = 3 ) inference = SNPE ( prior = prior , density_estimator = density_estimator_build_fun ) It is also possible to pass an embedding_net to posterior_nn() which learn summary statistics from high-dimensional simulation outputs. You can find a more detailed tutorial on this here .","title":"Changing hyperparameters of density estimators"},{"location":"tutorial/04_density_estimators/#building-new-density-estimators-from-scratch","text":"Finally, it is also possible to implement your own density estimator from scratch, e.g., including embedding nets to preprocess data, or to a density estimator architecture of your choice. For this, the density_estimator argument needs to be a function that takes theta and x batches as arguments to then construct the density estimator after the first set of simulations was generated. Our utils functions in sbi/utils/get_nn_models.py return such a function.","title":"Building new density estimators from scratch"},{"location":"tutorial/05_embedding_net/","text":"Learning summary statistics with a neural net \u00b6 When doing simulation-based inference, it is very important to use well-chosen summary statistics for describing the data generated by the simulator. Usually, these statistics take into account domain knowledge. For instance, in the example of the Hodgkin-Huxley model , the summary statistics are defined by a function which takes a 120 ms recording as input (a 12000-dimensional input vector) and outputs a 7-dimensional feature vector containing different statistical descriptors of the recording (e.g., number of spikes, average value, etc.). However, in other cases, it might be of interest to actually learn from the data which summary statistics to use, e.g., because the raw data is highly complex and domain knowledge is not available or not applicable. sbi offers functionality to learn summary statistics from (potentially high-dimensional) simulation outputs with a neural network. In sbi , this neural network is referred to as embedding_net . If an embedding_net is specified, the simulation outputs are passed through the embedding_net , whose outputs are then passed to the neural density estimator. The parameters of the embedding_net are learned together with the parameters of the neural density estimator. NB: only SNPE and SNRE methods can use an embedding_net to learn summary statistics from simulation outputs. SNLE does not offer this functionality since the simulation outputs \\(x\\) are the outputs of the neural density estimator. In the example that follows, we illustrate a situation where the data points generated by the simulator model are high-dimensional (32 by 32 images) and we use a convolutional neural network as summary statistics extractor. Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/05_embedding_net.ipynb in the sbi repository. First of all, we import all the packages required for running the tutorial import matplotlib.pyplot as plt import torch import torch.nn as nn import torch.nn.functional as F from sbi import utils from sbi import analysis from sbi import inference from sbi.inference import SNPE , simulate_for_sbi , prepare_for_sbi seed = 0 torch . manual_seed ( seed ) <torch._C.Generator at 0x7f124d5d04b0> The simulator model \u00b6 The simulator model that we consider has two parameters: \\(r\\) and \\(\\theta\\) . On each run, it generates 100 two-dimensional points centered around \\((r \\cos(\\theta), r \\sin(\\theta))\\) and perturbed by a Gaussian noise with variance 0.01. Instead of simply outputting the \\((x,y)\\) coordinates of each data point, the model generates a grayscale image of the scattered points with dimensions 32 by 32. This image is further perturbed by an uniform noise with values betweeen 0 and 0.2. The code below defines such model. def simulator_model ( parameter , return_points = False ): \"\"\"Simulator model with two-dimensional input parameter and 1024-dimensional output This simulator serves as a basic example for using a neural net for learning summary features. It has only two input parameters but generates high-dimensional output vectors. The data is generated as follows: (-) Input: parameter = [r, theta] (1) Generate 100 two-dimensional points centered around (r cos(theta),r sin(theta)) and perturbed by a Gaussian noise with variance 0.01 (2) Create a grayscale image I of the scattered points with dimensions 32 by 32 (3) Perturb I with an uniform noise with values betweeen 0 and 0.2 (-) Output: I Parameters ---------- parameter : array-like, shape (2) The two input parameters of the model, ordered as [r, theta] return_points : bool (default: False) Whether the simulator should return the coordinates of the simulated data points as well Returns ------- I: torch tensor, shape (1, 1024) Output flattened image (optional) points: array-like, shape (100, 2) Coordinates of the 2D simulated data points \"\"\" r = parameter [ 0 ] theta = parameter [ 1 ] sigma_points = 0.10 npoints = 100 points = [] for _ in range ( npoints ): x = r * torch . cos ( theta ) + sigma_points * torch . randn ( 1 ) y = r * torch . sin ( theta ) + sigma_points * torch . randn ( 1 ) points . append ([ x , y ]) points = torch . as_tensor ( points ) nx = 32 ny = 32 sigma_image = 0.20 I = torch . zeros ( nx , ny ) for point in points : pi = int (( point [ 0 ] - ( - 1 )) / (( + 1 ) - ( - 1 )) * nx ) pj = int (( point [ 1 ] - ( - 1 )) / (( + 1 ) - ( - 1 )) * ny ) if ( pi < nx ) and ( pj < ny ): I [ pi , pj ] = 1 I = I + sigma_image * torch . rand ( nx , ny ) I = I . T I = I . reshape ( 1 , - 1 ) if return_points : return I , points else : return I The figure below shows an example of the output of the simulator when \\(r = 0.70\\) and \\(\\theta = \\pi/4\\) # simulate samples true_parameter = torch . tensor ([ 0.70 , torch . pi / 4 ]) x_observed , x_points = simulator_model ( true_parameter , return_points = True ) # plot the observation fig , ax = plt . subplots ( facecolor = \"white\" , figsize = ( 11.15 , 5.61 ), ncols = 2 , constrained_layout = True ) circle = plt . Circle (( 0 , 0 ), 1.0 , color = \"k\" , ls = \"--\" , lw = 0.8 , fill = False ) ax [ 0 ] . add_artist ( circle ) ax [ 0 ] . scatter ( x_points [:, 0 ], x_points [:, 1 ], s = 20 ) ax [ 0 ] . set_xlabel ( \"x\" ) ax [ 0 ] . set_ylabel ( \"y\" ) ax [ 0 ] . set_xlim ( - 1 , + 1 ) ax [ 0 ] . set_xticks ([ - 1 , 0.0 , + 1.0 ]) ax [ 0 ] . set_ylim ( - 1 , + 1 ) ax [ 0 ] . set_yticks ([ - 1 , 0.0 , + 1.0 ]) ax [ 0 ] . set_title ( r \"original simulated points with $r = 0.70$ and $\\theta = \\pi/4$\" ) ax [ 1 ] . imshow ( x_observed . view ( 32 , 32 ), origin = \"lower\" , cmap = \"gray\" ) ax [ 1 ] . set_xticks ([]) ax [ 1 ] . set_yticks ([]) ax [ 1 ] . set_title ( \"noisy observed data (gray image with 32 x 32 pixels)\" ) Text(0.5, 1.0, 'noisy observed data (gray image with 32 x 32 pixels)') Defining an embedding_net \u00b6 An inference procedure applied to the output data from this simulator model determines the posterior distribution of \\(r\\) and \\(\\theta\\) given an observation of \\(x\\) , which lives in a 1024 dimensional space (32 x 32 = 1024). To avoid working directly on these high-dimensional vectors, one can use a convolutional neural network (CNN) that takes the 32x32 images as input and encodes them into 8-dimensional feature vectors. This CNN is trained along with the neural density estimator of the inference procedure and serves as an automatic summary statistics extractor. We define and instantiate the CNN as follows: class SummaryNet ( nn . Module ): def __init__ ( self ): super () . __init__ () # 2D convolutional layer self . conv1 = nn . Conv2d ( in_channels = 1 , out_channels = 6 , kernel_size = 5 , padding = 2 ) # Maxpool layer that reduces 32x32 image to 4x4 self . pool = nn . MaxPool2d ( kernel_size = 8 , stride = 8 ) # Fully connected layer taking as input the 6 flattened output arrays from the maxpooling layer self . fc = nn . Linear ( in_features = 6 * 4 * 4 , out_features = 8 ) def forward ( self , x ): x = x . view ( - 1 , 1 , 32 , 32 ) x = self . pool ( F . relu ( self . conv1 ( x ))) x = x . view ( - 1 , 6 * 4 * 4 ) x = F . relu ( self . fc ( x )) return x embedding_net = SummaryNet () The inference procedure \u00b6 With the embedding_net defined and instantiated, we can follow the usual workflow of an inference procedure in sbi . The embedding_net object appears as an input argument when instantiating the neural density estimator with utils.posterior_nn . # set prior distribution for the parameters prior = utils . BoxUniform ( low = torch . tensor ([ 0.0 , 0.0 ]), high = torch . tensor ([ 1.0 , 2 * torch . pi ]) ) # make a SBI-wrapper on the simulator object for compatibility simulator_wrapper , prior = prepare_for_sbi ( simulator_model , prior ) # instantiate the neural density estimator neural_posterior = utils . posterior_nn ( model = \"maf\" , embedding_net = embedding_net , hidden_features = 10 , num_transforms = 2 ) # setup the inference procedure with the SNPE-C procedure inference = SNPE ( prior = prior , density_estimator = neural_posterior ) # run the inference procedure on one round and 10000 simulated data points theta , x = simulate_for_sbi ( simulator_wrapper , prior , num_simulations = 10000 ) Running 10000 simulations.: 0%| | 0/10000 [00:00<?, ?it/s] density_estimator = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior ( density_estimator ) Neural network successfully converged after 76 epochs. Visualizing the results \u00b6 We now generate 50000 samples of the posterior distribution of \\(r\\) and \\(\\theta\\) when observing an input data point \\(x\\) generated from the simulator model with \\(r = 0.70\\) and \\(\\theta = \\pi/4\\) . # generate posterior samples true_parameter = torch . tensor ([ 0.70 , torch . pi / 4 ]) x_observed = simulator_model ( true_parameter ) samples = posterior . set_default_x ( x_observed ) . sample (( 50000 ,)) Drawing 50000 posterior samples: 0%| | 0/50000 [00:00<?, ?it/s] The figure below shows the statistics of the generated samples. # create the figure fig , ax = analysis . pairplot ( samples , points = true_parameter , labels = [ \"r\" , r \"$\\theta$\" ], limits = [[ 0 , 1 ], [ 0 , 2 * torch . pi ]], points_colors = \"r\" , points_offdiag = { \"markersize\" : 6 }, figsize = ( 5 , 5 ), )","title":"Learning summary statistics"},{"location":"tutorial/05_embedding_net/#learning-summary-statistics-with-a-neural-net","text":"When doing simulation-based inference, it is very important to use well-chosen summary statistics for describing the data generated by the simulator. Usually, these statistics take into account domain knowledge. For instance, in the example of the Hodgkin-Huxley model , the summary statistics are defined by a function which takes a 120 ms recording as input (a 12000-dimensional input vector) and outputs a 7-dimensional feature vector containing different statistical descriptors of the recording (e.g., number of spikes, average value, etc.). However, in other cases, it might be of interest to actually learn from the data which summary statistics to use, e.g., because the raw data is highly complex and domain knowledge is not available or not applicable. sbi offers functionality to learn summary statistics from (potentially high-dimensional) simulation outputs with a neural network. In sbi , this neural network is referred to as embedding_net . If an embedding_net is specified, the simulation outputs are passed through the embedding_net , whose outputs are then passed to the neural density estimator. The parameters of the embedding_net are learned together with the parameters of the neural density estimator. NB: only SNPE and SNRE methods can use an embedding_net to learn summary statistics from simulation outputs. SNLE does not offer this functionality since the simulation outputs \\(x\\) are the outputs of the neural density estimator. In the example that follows, we illustrate a situation where the data points generated by the simulator model are high-dimensional (32 by 32 images) and we use a convolutional neural network as summary statistics extractor. Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/05_embedding_net.ipynb in the sbi repository. First of all, we import all the packages required for running the tutorial import matplotlib.pyplot as plt import torch import torch.nn as nn import torch.nn.functional as F from sbi import utils from sbi import analysis from sbi import inference from sbi.inference import SNPE , simulate_for_sbi , prepare_for_sbi seed = 0 torch . manual_seed ( seed ) <torch._C.Generator at 0x7f124d5d04b0>","title":"Learning summary statistics with a neural net"},{"location":"tutorial/05_embedding_net/#the-simulator-model","text":"The simulator model that we consider has two parameters: \\(r\\) and \\(\\theta\\) . On each run, it generates 100 two-dimensional points centered around \\((r \\cos(\\theta), r \\sin(\\theta))\\) and perturbed by a Gaussian noise with variance 0.01. Instead of simply outputting the \\((x,y)\\) coordinates of each data point, the model generates a grayscale image of the scattered points with dimensions 32 by 32. This image is further perturbed by an uniform noise with values betweeen 0 and 0.2. The code below defines such model. def simulator_model ( parameter , return_points = False ): \"\"\"Simulator model with two-dimensional input parameter and 1024-dimensional output This simulator serves as a basic example for using a neural net for learning summary features. It has only two input parameters but generates high-dimensional output vectors. The data is generated as follows: (-) Input: parameter = [r, theta] (1) Generate 100 two-dimensional points centered around (r cos(theta),r sin(theta)) and perturbed by a Gaussian noise with variance 0.01 (2) Create a grayscale image I of the scattered points with dimensions 32 by 32 (3) Perturb I with an uniform noise with values betweeen 0 and 0.2 (-) Output: I Parameters ---------- parameter : array-like, shape (2) The two input parameters of the model, ordered as [r, theta] return_points : bool (default: False) Whether the simulator should return the coordinates of the simulated data points as well Returns ------- I: torch tensor, shape (1, 1024) Output flattened image (optional) points: array-like, shape (100, 2) Coordinates of the 2D simulated data points \"\"\" r = parameter [ 0 ] theta = parameter [ 1 ] sigma_points = 0.10 npoints = 100 points = [] for _ in range ( npoints ): x = r * torch . cos ( theta ) + sigma_points * torch . randn ( 1 ) y = r * torch . sin ( theta ) + sigma_points * torch . randn ( 1 ) points . append ([ x , y ]) points = torch . as_tensor ( points ) nx = 32 ny = 32 sigma_image = 0.20 I = torch . zeros ( nx , ny ) for point in points : pi = int (( point [ 0 ] - ( - 1 )) / (( + 1 ) - ( - 1 )) * nx ) pj = int (( point [ 1 ] - ( - 1 )) / (( + 1 ) - ( - 1 )) * ny ) if ( pi < nx ) and ( pj < ny ): I [ pi , pj ] = 1 I = I + sigma_image * torch . rand ( nx , ny ) I = I . T I = I . reshape ( 1 , - 1 ) if return_points : return I , points else : return I The figure below shows an example of the output of the simulator when \\(r = 0.70\\) and \\(\\theta = \\pi/4\\) # simulate samples true_parameter = torch . tensor ([ 0.70 , torch . pi / 4 ]) x_observed , x_points = simulator_model ( true_parameter , return_points = True ) # plot the observation fig , ax = plt . subplots ( facecolor = \"white\" , figsize = ( 11.15 , 5.61 ), ncols = 2 , constrained_layout = True ) circle = plt . Circle (( 0 , 0 ), 1.0 , color = \"k\" , ls = \"--\" , lw = 0.8 , fill = False ) ax [ 0 ] . add_artist ( circle ) ax [ 0 ] . scatter ( x_points [:, 0 ], x_points [:, 1 ], s = 20 ) ax [ 0 ] . set_xlabel ( \"x\" ) ax [ 0 ] . set_ylabel ( \"y\" ) ax [ 0 ] . set_xlim ( - 1 , + 1 ) ax [ 0 ] . set_xticks ([ - 1 , 0.0 , + 1.0 ]) ax [ 0 ] . set_ylim ( - 1 , + 1 ) ax [ 0 ] . set_yticks ([ - 1 , 0.0 , + 1.0 ]) ax [ 0 ] . set_title ( r \"original simulated points with $r = 0.70$ and $\\theta = \\pi/4$\" ) ax [ 1 ] . imshow ( x_observed . view ( 32 , 32 ), origin = \"lower\" , cmap = \"gray\" ) ax [ 1 ] . set_xticks ([]) ax [ 1 ] . set_yticks ([]) ax [ 1 ] . set_title ( \"noisy observed data (gray image with 32 x 32 pixels)\" ) Text(0.5, 1.0, 'noisy observed data (gray image with 32 x 32 pixels)')","title":"The simulator model"},{"location":"tutorial/05_embedding_net/#defining-an-embedding_net","text":"An inference procedure applied to the output data from this simulator model determines the posterior distribution of \\(r\\) and \\(\\theta\\) given an observation of \\(x\\) , which lives in a 1024 dimensional space (32 x 32 = 1024). To avoid working directly on these high-dimensional vectors, one can use a convolutional neural network (CNN) that takes the 32x32 images as input and encodes them into 8-dimensional feature vectors. This CNN is trained along with the neural density estimator of the inference procedure and serves as an automatic summary statistics extractor. We define and instantiate the CNN as follows: class SummaryNet ( nn . Module ): def __init__ ( self ): super () . __init__ () # 2D convolutional layer self . conv1 = nn . Conv2d ( in_channels = 1 , out_channels = 6 , kernel_size = 5 , padding = 2 ) # Maxpool layer that reduces 32x32 image to 4x4 self . pool = nn . MaxPool2d ( kernel_size = 8 , stride = 8 ) # Fully connected layer taking as input the 6 flattened output arrays from the maxpooling layer self . fc = nn . Linear ( in_features = 6 * 4 * 4 , out_features = 8 ) def forward ( self , x ): x = x . view ( - 1 , 1 , 32 , 32 ) x = self . pool ( F . relu ( self . conv1 ( x ))) x = x . view ( - 1 , 6 * 4 * 4 ) x = F . relu ( self . fc ( x )) return x embedding_net = SummaryNet ()","title":"Defining an embedding_net"},{"location":"tutorial/05_embedding_net/#the-inference-procedure","text":"With the embedding_net defined and instantiated, we can follow the usual workflow of an inference procedure in sbi . The embedding_net object appears as an input argument when instantiating the neural density estimator with utils.posterior_nn . # set prior distribution for the parameters prior = utils . BoxUniform ( low = torch . tensor ([ 0.0 , 0.0 ]), high = torch . tensor ([ 1.0 , 2 * torch . pi ]) ) # make a SBI-wrapper on the simulator object for compatibility simulator_wrapper , prior = prepare_for_sbi ( simulator_model , prior ) # instantiate the neural density estimator neural_posterior = utils . posterior_nn ( model = \"maf\" , embedding_net = embedding_net , hidden_features = 10 , num_transforms = 2 ) # setup the inference procedure with the SNPE-C procedure inference = SNPE ( prior = prior , density_estimator = neural_posterior ) # run the inference procedure on one round and 10000 simulated data points theta , x = simulate_for_sbi ( simulator_wrapper , prior , num_simulations = 10000 ) Running 10000 simulations.: 0%| | 0/10000 [00:00<?, ?it/s] density_estimator = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior ( density_estimator ) Neural network successfully converged after 76 epochs.","title":"The inference procedure"},{"location":"tutorial/05_embedding_net/#visualizing-the-results","text":"We now generate 50000 samples of the posterior distribution of \\(r\\) and \\(\\theta\\) when observing an input data point \\(x\\) generated from the simulator model with \\(r = 0.70\\) and \\(\\theta = \\pi/4\\) . # generate posterior samples true_parameter = torch . tensor ([ 0.70 , torch . pi / 4 ]) x_observed = simulator_model ( true_parameter ) samples = posterior . set_default_x ( x_observed ) . sample (( 50000 ,)) Drawing 50000 posterior samples: 0%| | 0/50000 [00:00<?, ?it/s] The figure below shows the statistics of the generated samples. # create the figure fig , ax = analysis . pairplot ( samples , points = true_parameter , labels = [ \"r\" , r \"$\\theta$\" ], limits = [[ 0 , 1 ], [ 0 , 2 * torch . pi ]], points_colors = \"r\" , points_offdiag = { \"markersize\" : 6 }, figsize = ( 5 , 5 ), )","title":"Visualizing the results"},{"location":"tutorial/07_conditional_distributions/","text":"Analysing variability and compensation mechansims with conditional distributions \u00b6 A central advantage of sbi over parameter search methods such as genetic algorithms is that the posterior captures all models that can reproduce experimental data. This allows us to analyse whether parameters can be variable or have to be narrowly tuned, and to analyse compensation mechanisms between different parameters. See also Marder and Taylor, 2011 for further motivation to identify all models that capture experimental data. In this tutorial, we will show how one can use the posterior distribution to identify whether parameters can be variable or have to be finely tuned, and how we can use the posterior to find potential compensation mechanisms between model parameters. To investigate this, we will extract conditional distributions from the posterior inferred with sbi . Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/07_conditional_distributions.ipynb in the sbi repository. Main syntax \u00b6 from sbi.analysis import conditional_pairplot , conditional_corrcoeff # Plot slices through posterior, i.e. conditionals. _ = conditional_pairplot ( density = posterior , condition = posterior . sample (( 1 ,)), limits = torch . tensor ([[ - 2.0 , 2.0 ], [ - 2.0 , 2.0 ]]), ) # Compute the matrix of correlation coefficients of the slices. cond_coeff_mat = conditional_corrcoeff ( density = posterior , condition = posterior . sample (( 1 ,)), limits = torch . tensor ([[ - 2.0 , 2.0 ], [ - 2.0 , 2.0 ]]), ) plt . imshow ( cond_coeff_mat , clim = [ - 1 , 1 ]) Analysing variability and compensation mechanisms in a toy example \u00b6 Below, we use a simple toy example to demonstrate the above described features. For an application of these features to a neuroscience problem, see figure 6 in Gon\u00e7alves, Lueckmann, Deistler et al., 2019 . from sbi import utils as utils from sbi.analysis import pairplot , conditional_pairplot , conditional_corrcoeff import torch import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from matplotlib import animation , rc from IPython.display import HTML , Image _ = torch . manual_seed ( 0 ) Let\u2019s say we have used SNPE to obtain a posterior distribution over three parameters. In this tutorial, we just load the posterior from a file: from toy_posterior_for_07_cc import ExamplePosterior posterior = ExamplePosterior () First, we specify the experimental observation \\(x_o\\) at which we want to evaluate and sample the posterior \\(p(\\theta|x_o)\\) : x_o = torch . ones ( 1 , 20 ) # simulator output was 20-dimensional posterior . set_default_x ( x_o ) As always, we can inspect the posterior marginals with the pairplot() function: posterior_samples = posterior . sample (( 5000 ,)) fig , ax = pairplot ( samples = posterior_samples , limits = torch . tensor ([[ - 2.0 , 2.0 ]] * 3 ), offdiag = [ \"kde\" ], diag = [ \"kde\" ], figsize = ( 5 , 5 ), ) The 1D and 2D marginals of the posterior fill almost the entire parameter space! Also, the Pearson correlation coefficient matrix of the marginal shows rather weak interactions (low correlations): corr_matrix_marginal = np . corrcoef ( posterior_samples . T ) fig , ax = plt . subplots ( 1 , 1 , figsize = ( 4 , 4 )) im = plt . imshow ( corr_matrix_marginal , clim = [ - 1 , 1 ], cmap = \"PiYG\" ) _ = fig . colorbar ( im ) It might be tempting to conclude that the experimental data barely constrains our parameters and that almost all parameter combinations can reproduce the experimental data. As we will show below, this is not the case. Because our toy posterior has only three parameters, we can plot posterior samples in a 3D plot: rc ( \"animation\" , html = \"html5\" ) # First set up the figure, the axis, and the plot element we want to animate fig = plt . figure ( figsize = ( 6 , 6 )) ax = fig . add_subplot ( 111 , projection = \"3d\" ) ax . set_xlim (( - 2 , 2 )) ax . set_ylim (( - 2 , 2 )) def init (): ( line ,) = ax . plot ([], [], lw = 2 ) line . set_data ([], []) return ( line ,) def animate ( angle ): num_samples_vis = 1000 line = ax . scatter ( posterior_samples [: num_samples_vis , 0 ], posterior_samples [: num_samples_vis , 1 ], posterior_samples [: num_samples_vis , 2 ], zdir = \"z\" , s = 15 , c = \"#2171b5\" , depthshade = False , ) ax . view_init ( 20 , angle ) return ( line ,) anim = animation . FuncAnimation ( fig , animate , init_func = init , frames = range ( 0 , 360 , 5 ), interval = 150 , blit = True ) plt . close () HTML ( anim . to_html5_video ()) Your browser does not support the video tag. Clearly, the range of admissible parameters is constrained to a narrow region in parameter space, which had not been evident from the marginals. If the posterior has more than three dimensions, inspecting all dimensions at once will not be possible anymore. One way to still reveal structures in high-dimensional posteriors is to inspect 2D-slices through the posterior. In sbi , this can be done with the conditional_pairplot() function, which computes the conditional distributions within the posterior. We can slice (i.e. condition) the posterior at any location, given by the condition . In the plot below, for all upper diagonal plots, we keep all but two parameters constant at values sampled from the posterior, and inspect what combinations of the remaining two parameters can reproduce experimental data. For the plots on the diagonal (the 1D conditionals), we keep all but one parameter constant. condition = posterior . sample (( 1 ,)) _ = conditional_pairplot ( density = posterior , condition = condition , limits = torch . tensor ([[ - 2.0 , 2.0 ]] * 3 ), figsize = ( 5 , 5 ), ) This plot looks completely different from the marginals obtained with pairplot() . As it can be seen on the diagonal plots, if all parameters but one are kept constant, the remaining parameter has to be tuned to a narrow region in parameter space. In addition, the upper diagonal plots show strong correlations: deviations in one parameter can be compensated through changes in another parameter. We can summarize these correlations in a conditional correlation matrix, which computes the Pearson correlation coefficient of each of these pairwise plots. This matrix (below) shows strong correlations between many parameters, which can be interpreted as potential compensation mechansims: cond_coeff_mat = conditional_corrcoeff ( density = posterior , condition = condition , limits = torch . tensor ([[ - 2.0 , 2.0 ]] * 3 ), ) fig , ax = plt . subplots ( 1 , 1 , figsize = ( 4 , 4 )) im = plt . imshow ( cond_coeff_mat , clim = [ - 1 , 1 ], cmap = \"PiYG\" ) _ = fig . colorbar ( im ) So far, we have investigated the conditional distribution only at a specific condition sampled from the posterior. In many applications, it makes sense to repeat the above analyses with a different condition (another sample from the posterior), which can be interpreted as slicing the posterior at a different location. Note that conditional_corrcoeff() can directly compute the matrix for several conditions and then outputs the average over them. This can be done by passing a batch of \\(N\\) conditions as the condition argument. Sampling conditional distributions \u00b6 So far, we have demonstrated how one can plot 2D conditional distributions with conditional_pairplot() and how one can compute the pairwise conditional correlation coefficient with conditional_corrcoeff() . In some cases, it can be useful to keep a subset of parameters fixed and to vary more than two parameters. This can be done by sampling the conditonal posterior \\(p(\\theta_i | \\theta_{j \\neq i}, x_o)\\) . As of sbi v0.18.0 , this functionality requires using the sampler interface . In this tutorial, we demonstrate this functionality on a linear gaussian simulator with four parameters. We would like to fix the forth parameter to \\(\\theta_4=0.2\\) and sample the first three parameters given that value, i.e. we want to sample \\(p(\\theta_1, \\theta_2, \\theta_3 | \\theta_4 = 0.2, x_o)\\) . For an application in neuroscience, see Deistler, Gon\u00e7alves, Macke, 2021 . In this tutorial, we will use SNPE, but the same also works for SNLE and SNRE. First, we define the prior and the simulator and train the deep neural density estimator: from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.inference import posterior_estimator_based_potential , MCMCPosterior from sbi.utils import BoxUniform import torch num_dim = 4 prior = BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) inference = SNPE () theta , x = simulate_for_sbi ( simulator , prior , 1000 ) posterior_estimator = inference . append_simulations ( theta , x ) . train () Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Neural network successfully converged after 147 epochs. Next, we follow the sampler interface and create a potential_function. The observation in this example is x_o=[1, 1, 1, 1] . potential_fn , theta_transform = posterior_estimator_based_potential ( posterior_estimator , prior = prior , x_o = torch . ones ( 4 ) ) Now we want to build the conditional potential (please read throught the sampler interface tutorial for an explanation of potential functions). For this, we have to pass a condition . In our case, we want to condition the forth parameter on \\(\\theta_4=0.2\\) . Regardless of how many parameters one wants to condition on, in sbi , one has to pass a condition value for all parameters. The first three values will simply be ignored. We can tell the algorithm which parameters should be kept fixed and which ones should be sampled with the argument dims_to_sample . from sbi.analysis import conditional_potential conditioned_potential_fn , restricted_tf , restricted_prior = conditional_potential ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , condition = torch . as_tensor ( [ 0.0 , 0.0 , 0.0 , 0.2 ] ), # the first three values are arbitrary and are ignored internally dims_to_sample = [ 0 , 1 , 2 ], ) Finally, we have to build a sampler for the conditioned_potential_fn . E.g., we can sample the conditional posterior with MCMC: mcmc_posterior = MCMCPosterior ( potential_fn = conditioned_potential_fn , theta_transform = restricted_tf , proposal = restricted_prior , ) cond_samples = mcmc_posterior . sample (( 100 ,)) 0%| | 0/50 [00:00<?, ?it/s] 0%| | 0/10 [00:00<?, ?it/s] 0%| | 0/100 [00:00<?, ?it/s] The resulting samples are 3-dimensional, corresponding to \\([\\theta_1, \\theta_2, \\theta_3]\\) , sampled from \\(p(\\theta_1, \\theta_2, \\theta_3 | \\theta_4=0.2, x_o)\\) . print ( cond_samples . shape ) torch.Size([100, 3]) We can also plot them with pairplot : from sbi.analysis import pairplot _ = pairplot ( cond_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 4 , 4 ))","title":"Conditional distributions"},{"location":"tutorial/07_conditional_distributions/#analysing-variability-and-compensation-mechansims-with-conditional-distributions","text":"A central advantage of sbi over parameter search methods such as genetic algorithms is that the posterior captures all models that can reproduce experimental data. This allows us to analyse whether parameters can be variable or have to be narrowly tuned, and to analyse compensation mechanisms between different parameters. See also Marder and Taylor, 2011 for further motivation to identify all models that capture experimental data. In this tutorial, we will show how one can use the posterior distribution to identify whether parameters can be variable or have to be finely tuned, and how we can use the posterior to find potential compensation mechanisms between model parameters. To investigate this, we will extract conditional distributions from the posterior inferred with sbi . Note, you can find the original version of this notebook at https://github.com/sbi-dev/sbi/blob/main/tutorials/07_conditional_distributions.ipynb in the sbi repository.","title":"Analysing variability and compensation mechansims with conditional distributions"},{"location":"tutorial/07_conditional_distributions/#main-syntax","text":"from sbi.analysis import conditional_pairplot , conditional_corrcoeff # Plot slices through posterior, i.e. conditionals. _ = conditional_pairplot ( density = posterior , condition = posterior . sample (( 1 ,)), limits = torch . tensor ([[ - 2.0 , 2.0 ], [ - 2.0 , 2.0 ]]), ) # Compute the matrix of correlation coefficients of the slices. cond_coeff_mat = conditional_corrcoeff ( density = posterior , condition = posterior . sample (( 1 ,)), limits = torch . tensor ([[ - 2.0 , 2.0 ], [ - 2.0 , 2.0 ]]), ) plt . imshow ( cond_coeff_mat , clim = [ - 1 , 1 ])","title":"Main syntax"},{"location":"tutorial/07_conditional_distributions/#analysing-variability-and-compensation-mechanisms-in-a-toy-example","text":"Below, we use a simple toy example to demonstrate the above described features. For an application of these features to a neuroscience problem, see figure 6 in Gon\u00e7alves, Lueckmann, Deistler et al., 2019 . from sbi import utils as utils from sbi.analysis import pairplot , conditional_pairplot , conditional_corrcoeff import torch import numpy as np import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D from matplotlib import animation , rc from IPython.display import HTML , Image _ = torch . manual_seed ( 0 ) Let\u2019s say we have used SNPE to obtain a posterior distribution over three parameters. In this tutorial, we just load the posterior from a file: from toy_posterior_for_07_cc import ExamplePosterior posterior = ExamplePosterior () First, we specify the experimental observation \\(x_o\\) at which we want to evaluate and sample the posterior \\(p(\\theta|x_o)\\) : x_o = torch . ones ( 1 , 20 ) # simulator output was 20-dimensional posterior . set_default_x ( x_o ) As always, we can inspect the posterior marginals with the pairplot() function: posterior_samples = posterior . sample (( 5000 ,)) fig , ax = pairplot ( samples = posterior_samples , limits = torch . tensor ([[ - 2.0 , 2.0 ]] * 3 ), offdiag = [ \"kde\" ], diag = [ \"kde\" ], figsize = ( 5 , 5 ), ) The 1D and 2D marginals of the posterior fill almost the entire parameter space! Also, the Pearson correlation coefficient matrix of the marginal shows rather weak interactions (low correlations): corr_matrix_marginal = np . corrcoef ( posterior_samples . T ) fig , ax = plt . subplots ( 1 , 1 , figsize = ( 4 , 4 )) im = plt . imshow ( corr_matrix_marginal , clim = [ - 1 , 1 ], cmap = \"PiYG\" ) _ = fig . colorbar ( im ) It might be tempting to conclude that the experimental data barely constrains our parameters and that almost all parameter combinations can reproduce the experimental data. As we will show below, this is not the case. Because our toy posterior has only three parameters, we can plot posterior samples in a 3D plot: rc ( \"animation\" , html = \"html5\" ) # First set up the figure, the axis, and the plot element we want to animate fig = plt . figure ( figsize = ( 6 , 6 )) ax = fig . add_subplot ( 111 , projection = \"3d\" ) ax . set_xlim (( - 2 , 2 )) ax . set_ylim (( - 2 , 2 )) def init (): ( line ,) = ax . plot ([], [], lw = 2 ) line . set_data ([], []) return ( line ,) def animate ( angle ): num_samples_vis = 1000 line = ax . scatter ( posterior_samples [: num_samples_vis , 0 ], posterior_samples [: num_samples_vis , 1 ], posterior_samples [: num_samples_vis , 2 ], zdir = \"z\" , s = 15 , c = \"#2171b5\" , depthshade = False , ) ax . view_init ( 20 , angle ) return ( line ,) anim = animation . FuncAnimation ( fig , animate , init_func = init , frames = range ( 0 , 360 , 5 ), interval = 150 , blit = True ) plt . close () HTML ( anim . to_html5_video ()) Your browser does not support the video tag. Clearly, the range of admissible parameters is constrained to a narrow region in parameter space, which had not been evident from the marginals. If the posterior has more than three dimensions, inspecting all dimensions at once will not be possible anymore. One way to still reveal structures in high-dimensional posteriors is to inspect 2D-slices through the posterior. In sbi , this can be done with the conditional_pairplot() function, which computes the conditional distributions within the posterior. We can slice (i.e. condition) the posterior at any location, given by the condition . In the plot below, for all upper diagonal plots, we keep all but two parameters constant at values sampled from the posterior, and inspect what combinations of the remaining two parameters can reproduce experimental data. For the plots on the diagonal (the 1D conditionals), we keep all but one parameter constant. condition = posterior . sample (( 1 ,)) _ = conditional_pairplot ( density = posterior , condition = condition , limits = torch . tensor ([[ - 2.0 , 2.0 ]] * 3 ), figsize = ( 5 , 5 ), ) This plot looks completely different from the marginals obtained with pairplot() . As it can be seen on the diagonal plots, if all parameters but one are kept constant, the remaining parameter has to be tuned to a narrow region in parameter space. In addition, the upper diagonal plots show strong correlations: deviations in one parameter can be compensated through changes in another parameter. We can summarize these correlations in a conditional correlation matrix, which computes the Pearson correlation coefficient of each of these pairwise plots. This matrix (below) shows strong correlations between many parameters, which can be interpreted as potential compensation mechansims: cond_coeff_mat = conditional_corrcoeff ( density = posterior , condition = condition , limits = torch . tensor ([[ - 2.0 , 2.0 ]] * 3 ), ) fig , ax = plt . subplots ( 1 , 1 , figsize = ( 4 , 4 )) im = plt . imshow ( cond_coeff_mat , clim = [ - 1 , 1 ], cmap = \"PiYG\" ) _ = fig . colorbar ( im ) So far, we have investigated the conditional distribution only at a specific condition sampled from the posterior. In many applications, it makes sense to repeat the above analyses with a different condition (another sample from the posterior), which can be interpreted as slicing the posterior at a different location. Note that conditional_corrcoeff() can directly compute the matrix for several conditions and then outputs the average over them. This can be done by passing a batch of \\(N\\) conditions as the condition argument.","title":"Analysing variability and compensation mechanisms in a toy example"},{"location":"tutorial/07_conditional_distributions/#sampling-conditional-distributions","text":"So far, we have demonstrated how one can plot 2D conditional distributions with conditional_pairplot() and how one can compute the pairwise conditional correlation coefficient with conditional_corrcoeff() . In some cases, it can be useful to keep a subset of parameters fixed and to vary more than two parameters. This can be done by sampling the conditonal posterior \\(p(\\theta_i | \\theta_{j \\neq i}, x_o)\\) . As of sbi v0.18.0 , this functionality requires using the sampler interface . In this tutorial, we demonstrate this functionality on a linear gaussian simulator with four parameters. We would like to fix the forth parameter to \\(\\theta_4=0.2\\) and sample the first three parameters given that value, i.e. we want to sample \\(p(\\theta_1, \\theta_2, \\theta_3 | \\theta_4 = 0.2, x_o)\\) . For an application in neuroscience, see Deistler, Gon\u00e7alves, Macke, 2021 . In this tutorial, we will use SNPE, but the same also works for SNLE and SNRE. First, we define the prior and the simulator and train the deep neural density estimator: from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.inference import posterior_estimator_based_potential , MCMCPosterior from sbi.utils import BoxUniform import torch num_dim = 4 prior = BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) inference = SNPE () theta , x = simulate_for_sbi ( simulator , prior , 1000 ) posterior_estimator = inference . append_simulations ( theta , x ) . train () Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Neural network successfully converged after 147 epochs. Next, we follow the sampler interface and create a potential_function. The observation in this example is x_o=[1, 1, 1, 1] . potential_fn , theta_transform = posterior_estimator_based_potential ( posterior_estimator , prior = prior , x_o = torch . ones ( 4 ) ) Now we want to build the conditional potential (please read throught the sampler interface tutorial for an explanation of potential functions). For this, we have to pass a condition . In our case, we want to condition the forth parameter on \\(\\theta_4=0.2\\) . Regardless of how many parameters one wants to condition on, in sbi , one has to pass a condition value for all parameters. The first three values will simply be ignored. We can tell the algorithm which parameters should be kept fixed and which ones should be sampled with the argument dims_to_sample . from sbi.analysis import conditional_potential conditioned_potential_fn , restricted_tf , restricted_prior = conditional_potential ( potential_fn = potential_fn , theta_transform = theta_transform , prior = prior , condition = torch . as_tensor ( [ 0.0 , 0.0 , 0.0 , 0.2 ] ), # the first three values are arbitrary and are ignored internally dims_to_sample = [ 0 , 1 , 2 ], ) Finally, we have to build a sampler for the conditioned_potential_fn . E.g., we can sample the conditional posterior with MCMC: mcmc_posterior = MCMCPosterior ( potential_fn = conditioned_potential_fn , theta_transform = restricted_tf , proposal = restricted_prior , ) cond_samples = mcmc_posterior . sample (( 100 ,)) 0%| | 0/50 [00:00<?, ?it/s] 0%| | 0/10 [00:00<?, ?it/s] 0%| | 0/100 [00:00<?, ?it/s] The resulting samples are 3-dimensional, corresponding to \\([\\theta_1, \\theta_2, \\theta_3]\\) , sampled from \\(p(\\theta_1, \\theta_2, \\theta_3 | \\theta_4=0.2, x_o)\\) . print ( cond_samples . shape ) torch.Size([100, 3]) We can also plot them with pairplot : from sbi.analysis import pairplot _ = pairplot ( cond_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 4 , 4 ))","title":"Sampling conditional distributions"},{"location":"tutorial/08_restriction_estimator/","text":"Efficient handling of invalid simulation outputs \u00b6 For many simulators, the output of the simulator can be ill-defined or it can have non-sensical values. For example, in neuroscience models, if a specific parameter set does not produce a spike, features such as the spike shape can not be computed. When using sbi , such simulations that have NaN or inf in their output are discarded during neural network training. This can lead to inefficetive use of simulation budget: we carry out many simulations, but a potentially large fraction of them is discarded. In this tutorial, we show how we can use sbi to learn regions in parameter space that produce valid simulation outputs, and thereby improve the sampling efficiency. The key idea of the method is to use a classifier to distinguish parameters that lead to valid simulations from regions that lead to invalid simulations. After we have obtained the region in parameter space that produes valid simulation outputs, we train the deep neural density estimator used in SNPE . The method was originally proposed in Lueckmann, Goncalves et al. 2017 and later used in Deistler et al. 2021 . Main syntax \u00b6 from sbi.inference import SNPE from sbi.utils import RestrictionEstimator restriction_estimator = RestrictionEstimator ( prior = prior ) proposals = [ prior ] for r in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposals [ - 1 ], 1000 ) restriction_estimator . append_simulations ( theta , x ) if ( r < num_rounds - 1 ): # training not needed in last round because classifier will not be used anymore. classifier = restriction_estimator . train () proposals . append ( restriction_estimator . restrict_prior ()) all_theta , all_x , _ = restriction_estimator . get_simulations () inference = SNPE ( prior = prior ) density_estimator = inference . append_simulations ( all_theta , all_x ) . train () posterior = inference . build_posterior () Further explanation in a toy example \u00b6 from sbi.inference import SNPE , simulate_for_sbi from sbi.utils import RestrictionEstimator , BoxUniform from sbi.analysis import pairplot import torch _ = torch . manual_seed ( 2 ) We will define a simulator with two parameters and two simulation outputs. The simulator produces NaN whenever the first parameter is below 0.0 . If it is above 0.0 the simulator simply perturbs the parameter set with Gaussian noise: def simulator ( theta ): perturbed_theta = theta + 0.5 * torch . randn ( 2 ) perturbed_theta [ theta [:, 0 ] < 0.0 ] = torch . as_tensor ([ float ( \"nan\" ), float ( \"nan\" )]) return perturbed_theta The prior is a uniform distribution in [-2, 2]: prior = BoxUniform ( - 2 * torch . ones ( 2 ), 2 * torch . ones ( 2 )) We then begin by drawing samples from the prior and simulating them. Looking at the simulation outputs, half of them contain NaN : theta , x = simulate_for_sbi ( simulator , prior , 1000 ) print ( \"Simulation outputs: \" , x ) Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Simulation outputs: tensor([[ 0.0411, -0.5656], [ 0.0096, -1.0841], [ 1.2937, 0.9448], ..., [ nan, nan], [ nan, nan], [ 2.7940, 0.6461]]) The simulations that contain NaN are wasted, and we want to learn to \u201crestrict\u201d the prior such that it produces only valid simulation outputs. To do so, we set up the RestrictionEstimator : restriction_estimator = RestrictionEstimator ( prior = prior ) The RestrictionEstimator trains a classifier to distinguish parameters that lead to valid simulation outputs from parameters that lead to invalid simulation outputs restriction_estimator . append_simulations ( theta , x ) classifier = restriction_estimator . train () Training neural network. Epochs trained: 35 We can inspect the restricted_prior , i.e. the parameters that the classifier believes will lead to valid simulation outputs, with: restricted_prior = restriction_estimator . restrict_prior () samples = restricted_prior . sample (( 10_000 ,)) _ = pairplot ( samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ]], fig_size = ( 4 , 4 )) The classifier rejected 51.6% of all samples. You will get a speed-up of 106.5%. Indeed, parameter sets sampled from the restricted_prior always have a first parameter larger than 0.0 . These are the ones that produce valid simulation outputs (see our definition of the simulator above). We can then use the restricted_prior to generate more simulations. Almost all of them will have valid simulation outputs: new_theta , new_x = simulate_for_sbi ( simulator , restricted_prior , 1000 ) print ( \"Simulation outputs: \" , new_x ) The classifier rejected 50.9% of all samples. You will get a speed-up of 103.6%. Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Simulation outputs: tensor([[ 0.6834, -0.2415], [ 1.3459, 1.5373], [ 2.1092, 1.9180], ..., [ 0.8845, 0.4036], [ 1.9111, 1.2526], [ 0.8320, 2.3755]]) We can now use all simulations and run SNPE as always: restriction_estimator . append_simulations ( new_theta , new_x ) # Gather the new simulations in the `restriction_estimator`. ( all_theta , all_x , _ , ) = restriction_estimator . get_simulations () # Get all simulations run so far. inference = SNPE ( prior = prior ) density_estimator = inference . append_simulations ( all_theta , all_x ) . train () posterior = inference . build_posterior () posterior_samples = posterior . sample (( 10_000 ,), x = torch . ones ( 2 )) _ = pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ]], fig_size = ( 3 , 3 )) WARNING:root:Found 523 NaN simulations and 0 Inf simulations. They will be excluded from training. Neural network successfully converged after 118 epochs. Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] Further options for tuning the algorithm \u00b6 the whole procedure can be repeated many times (see the loop shown in \u201cMain syntax\u201d in this tutorial) the classifier is trained to be relatively conservative, i.e. it will try to be very sure that a specific parameter set can indeed not produce valid simulation outputs. If you are ok with the restricted prior potentially ignoring a small fraction of parameter sets that might have produced valid data, you can use restriction_estimator.restrict_prior(allowed_false_negatives=...) . The argument allowed_false_negatives sets the fraction of potentially ignored parameter sets. A higher value will lead to more valid simulations. By default, the algorithm considers simulations that have at least one NaN of inf as invalid . You can specify custom criterions with RestrictionEstimator(decision_criterion=...)","title":"Handling invalid simulations"},{"location":"tutorial/08_restriction_estimator/#efficient-handling-of-invalid-simulation-outputs","text":"For many simulators, the output of the simulator can be ill-defined or it can have non-sensical values. For example, in neuroscience models, if a specific parameter set does not produce a spike, features such as the spike shape can not be computed. When using sbi , such simulations that have NaN or inf in their output are discarded during neural network training. This can lead to inefficetive use of simulation budget: we carry out many simulations, but a potentially large fraction of them is discarded. In this tutorial, we show how we can use sbi to learn regions in parameter space that produce valid simulation outputs, and thereby improve the sampling efficiency. The key idea of the method is to use a classifier to distinguish parameters that lead to valid simulations from regions that lead to invalid simulations. After we have obtained the region in parameter space that produes valid simulation outputs, we train the deep neural density estimator used in SNPE . The method was originally proposed in Lueckmann, Goncalves et al. 2017 and later used in Deistler et al. 2021 .","title":"Efficient handling of invalid simulation outputs"},{"location":"tutorial/08_restriction_estimator/#main-syntax","text":"from sbi.inference import SNPE from sbi.utils import RestrictionEstimator restriction_estimator = RestrictionEstimator ( prior = prior ) proposals = [ prior ] for r in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposals [ - 1 ], 1000 ) restriction_estimator . append_simulations ( theta , x ) if ( r < num_rounds - 1 ): # training not needed in last round because classifier will not be used anymore. classifier = restriction_estimator . train () proposals . append ( restriction_estimator . restrict_prior ()) all_theta , all_x , _ = restriction_estimator . get_simulations () inference = SNPE ( prior = prior ) density_estimator = inference . append_simulations ( all_theta , all_x ) . train () posterior = inference . build_posterior ()","title":"Main syntax"},{"location":"tutorial/08_restriction_estimator/#further-explanation-in-a-toy-example","text":"from sbi.inference import SNPE , simulate_for_sbi from sbi.utils import RestrictionEstimator , BoxUniform from sbi.analysis import pairplot import torch _ = torch . manual_seed ( 2 ) We will define a simulator with two parameters and two simulation outputs. The simulator produces NaN whenever the first parameter is below 0.0 . If it is above 0.0 the simulator simply perturbs the parameter set with Gaussian noise: def simulator ( theta ): perturbed_theta = theta + 0.5 * torch . randn ( 2 ) perturbed_theta [ theta [:, 0 ] < 0.0 ] = torch . as_tensor ([ float ( \"nan\" ), float ( \"nan\" )]) return perturbed_theta The prior is a uniform distribution in [-2, 2]: prior = BoxUniform ( - 2 * torch . ones ( 2 ), 2 * torch . ones ( 2 )) We then begin by drawing samples from the prior and simulating them. Looking at the simulation outputs, half of them contain NaN : theta , x = simulate_for_sbi ( simulator , prior , 1000 ) print ( \"Simulation outputs: \" , x ) Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Simulation outputs: tensor([[ 0.0411, -0.5656], [ 0.0096, -1.0841], [ 1.2937, 0.9448], ..., [ nan, nan], [ nan, nan], [ 2.7940, 0.6461]]) The simulations that contain NaN are wasted, and we want to learn to \u201crestrict\u201d the prior such that it produces only valid simulation outputs. To do so, we set up the RestrictionEstimator : restriction_estimator = RestrictionEstimator ( prior = prior ) The RestrictionEstimator trains a classifier to distinguish parameters that lead to valid simulation outputs from parameters that lead to invalid simulation outputs restriction_estimator . append_simulations ( theta , x ) classifier = restriction_estimator . train () Training neural network. Epochs trained: 35 We can inspect the restricted_prior , i.e. the parameters that the classifier believes will lead to valid simulation outputs, with: restricted_prior = restriction_estimator . restrict_prior () samples = restricted_prior . sample (( 10_000 ,)) _ = pairplot ( samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ]], fig_size = ( 4 , 4 )) The classifier rejected 51.6% of all samples. You will get a speed-up of 106.5%. Indeed, parameter sets sampled from the restricted_prior always have a first parameter larger than 0.0 . These are the ones that produce valid simulation outputs (see our definition of the simulator above). We can then use the restricted_prior to generate more simulations. Almost all of them will have valid simulation outputs: new_theta , new_x = simulate_for_sbi ( simulator , restricted_prior , 1000 ) print ( \"Simulation outputs: \" , new_x ) The classifier rejected 50.9% of all samples. You will get a speed-up of 103.6%. Running 1000 simulations.: 0%| | 0/1000 [00:00<?, ?it/s] Simulation outputs: tensor([[ 0.6834, -0.2415], [ 1.3459, 1.5373], [ 2.1092, 1.9180], ..., [ 0.8845, 0.4036], [ 1.9111, 1.2526], [ 0.8320, 2.3755]]) We can now use all simulations and run SNPE as always: restriction_estimator . append_simulations ( new_theta , new_x ) # Gather the new simulations in the `restriction_estimator`. ( all_theta , all_x , _ , ) = restriction_estimator . get_simulations () # Get all simulations run so far. inference = SNPE ( prior = prior ) density_estimator = inference . append_simulations ( all_theta , all_x ) . train () posterior = inference . build_posterior () posterior_samples = posterior . sample (( 10_000 ,), x = torch . ones ( 2 )) _ = pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ]], fig_size = ( 3 , 3 )) WARNING:root:Found 523 NaN simulations and 0 Inf simulations. They will be excluded from training. Neural network successfully converged after 118 epochs. Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s]","title":"Further explanation in a toy example"},{"location":"tutorial/08_restriction_estimator/#further-options-for-tuning-the-algorithm","text":"the whole procedure can be repeated many times (see the loop shown in \u201cMain syntax\u201d in this tutorial) the classifier is trained to be relatively conservative, i.e. it will try to be very sure that a specific parameter set can indeed not produce valid simulation outputs. If you are ok with the restricted prior potentially ignoring a small fraction of parameter sets that might have produced valid data, you can use restriction_estimator.restrict_prior(allowed_false_negatives=...) . The argument allowed_false_negatives sets the fraction of potentially ignored parameter sets. A higher value will lead to more valid simulations. By default, the algorithm considers simulations that have at least one NaN of inf as invalid . You can specify custom criterions with RestrictionEstimator(decision_criterion=...)","title":"Further options for tuning the algorithm"},{"location":"tutorial/09_sensitivity_analysis/","text":"Active subspaces for sensitivity analysis \u00b6 A standard method to analyse dynamical systems such as models of neural dynamics is to use a sensitivity analysis. We can use the posterior obtained with sbi , to perform such analyses. Main syntax \u00b6 from sbi.analysis import ActiveSubspace sensitivity = ActiveSubspace ( posterior . set_default_x ( x_o )) e_vals , e_vecs = sensitivity . find_directions ( posterior_log_prob_as_property = True ) projected_data = sensitivity . project ( theta_project , num_dimensions = 1 ) Example and further explanation \u00b6 import torch from torch.distributions import MultivariateNormal from sbi.analysis import ActiveSubspace , pairplot from sbi.simulators import linear_gaussian from sbi.inference import simulate_for_sbi , infer _ = torch . manual_seed ( 0 ) Let\u2019s define a simple Gaussian toy example: prior = MultivariateNormal ( 0.0 * torch . ones ( 2 ), 2 * torch . eye ( 2 )) def simulator ( theta ): return linear_gaussian ( theta , - 0.8 * torch . ones ( 2 ), torch . tensor ([[ 1.0 , 0.98 ], [ 0.98 , 1.0 ]]) ) posterior = infer ( simulator , prior , num_simulations = 2000 , method = \"SNPE\" ) . set_default_x ( torch . zeros ( 2 ) ) Running 2000 simulations.: 0%| | 0/2000 [00:00<?, ?it/s] Neural network successfully converged after 117 epochs. posterior_samples = posterior . sample (( 2000 ,)) Drawing 2000 posterior samples: 0%| | 0/2000 [00:00<?, ?it/s] _ = pairplot ( posterior_samples , limits = [[ - 3 , 3 ], [ - 3 , 3 ]], figsize = ( 4 , 4 )) When performing a sensitivity analysis on this model, we would expect that there is one direction that is less sensitive (from bottom left to top right, along the vector [1, 1]) and one direction that is more sensitive (from top left to bottom right, along [1, -1]). We can recover these directions with the ActiveSubspace module in sbi . sensitivity = ActiveSubspace ( posterior ) e_vals , e_vecs = sensitivity . find_directions ( posterior_log_prob_as_property = True ) Drawing 1000 posterior samples: 0%| | 0/1000 [00:00<?, ?it/s] The method .find_active() returns eigenvalues and the corresponding eigenvectors. It does so by computing the matrix: \\(M = \\mathbb{E}_{p(\\theta|x_o)}[\\nabla_{\\theta}p(\\theta|x_o)^T \\nabla_{\\theta}p(\\theta|x_o)\\) ] It then does an eigendecomposition: \\(M = Q \\Lambda Q^{-1}\\) A strong eigenvalue indicates that the gradient of the posterior density is large, i.e. the system output is sensitive to changes along the direction of the corresponding eigenvector (or active ). The eigenvalue corresponding to the vector [0.68, -0.73] is much larger than the eigenvalue of [0.73, 0.67] . This matches the intuition we developed above. print ( \"Eigenvalues: \\n \" , e_vals , \" \\n \" ) print ( \"Eigenvectors: \\n \" , e_vecs ) Eigenvalues: tensor([2.3552e-06, 7.0754e-05]) Eigenvectors: tensor([[-0.7066, -0.7076], [-0.7076, 0.7066]]) Lastly, we can project the data into the active dimensions. In this case, we will just use one active dimension: projected_data = sensitivity . project ( posterior_samples , num_dimensions = 1 ) Some technical details \u00b6 The gradients and thus the eigenvectors are computed in z-scored space. The mean and standard deviation are computed w.r.t. the prior distribution. Thus, the gradients (and thus the eigenvales) reflect changes on the scale of the prior. The expected value used to compute the matrix \\(M\\) is estimated using 1000 posterior samples. This value can be set with the .find_active(num_monte_carlo_samples=...) variable. How does this relate to Principal Component Analysis (PCA)? In the example above, the results of PCA would be very similar. However, there are two main differences to PCA: First, PCA ignores local changes in the posterior, whereas the active subspace can change a lot (since it computes the gradient, which is a local quantity). Second, active subspaces can be used characterize the sensitivity of any other quantity w.r.t. circuit parameters. This is outlined below: Computing the sensitivity of a specific summary statistic \u00b6 Above, we have shown how to identify directions along which the posterior probability changes rapidly. Notably, the posterior probability reflects how consistent a specific parameter set is with all summary statistics, i.e. the entire \\(x_o\\) . Sometimes, we might be interested in investigating how a specific features is influenced by the parameters. This feature could be one of the values of \\(x_o\\) , but it could also be a different property. As a neuroscience example, in Deistler et al. 2021, we obtained the posterior distribution given burst durations and delays between bursts. After having obtained the posterior, we then wanted to analyse the sensitivity of metabolic cost w.r.t. circuit parameters. The framework we presented above can easily be extended to study such questions. prior = MultivariateNormal ( 0.0 * torch . ones ( 2 ), 2 * torch . eye ( 2 )) def simulator ( theta ): return linear_gaussian ( theta , - 0.8 * torch . ones ( 2 ), torch . eye ( 2 )) posterior = infer ( simulator , prior , num_simulations = 2000 , method = \"SNPE\" ) . set_default_x ( torch . zeros ( 2 ) ) Running 2000 simulations.: 0%| | 0/2000 [00:00<?, ?it/s] Neural network successfully converged after 139 epochs. _ = pairplot ( posterior . sample (( 10_000 ,)), limits = [[ - 3 , 3 ], [ - 3 , 3 ]], figsize = ( 4 , 4 )) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] sensitivity = ActiveSubspace ( posterior ) This time, we begin by drawing samples from the posterior and then computing the desired property for each of the samples (i.e. you will probably have to run simulations for each theta and extract the property from the simulation output). As an example, we assume that the property is just the cube of the first dimension of the simulation output: theta , x = simulate_for_sbi ( simulator , posterior , 5000 ) property_ = x [:, : 1 ] ** 3 # E.g. metabolic cost. Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Running 5000 simulations.: 0%| | 0/5000 [00:00<?, ?it/s] To investigate the sensitivity of a given parameter, we train a neural network to predict the property_ from the parameters and then analyse the neural network as above: \\(M = \\mathbb{E}_{p(\\theta|x_o)}[\\nabla_{\\theta}f(\\theta)^T \\nabla_{\\theta}f(\\theta)\\) ] where \\(f(\\cdot)\\) is the trained neural network. _ = sensitivity . add_property ( theta , property_ ) . train () e_vals , e_vecs = sensitivity . find_directions () Training neural network. Epochs trained: 24 Drawing 1000 posterior samples: 0%| | 0/1000 [00:00<?, ?it/s] print ( \"Eigenvalues: \\n \" , e_vals , \" \\n \" ) print ( \"Eigenvectors: \\n \" , e_vecs ) Eigenvalues: tensor([2.8801e-06, 6.1131e-05]) Eigenvectors: tensor([[ 0.0362, 0.9993], [ 0.9993, -0.0362]]) As we can see, one of the eigenvalues is much smaller than the other one. The larger eigenvalue represents (approximately) the vector [1.0, 0.0] . This makes sense, because only the property_ is influenced only by the first output which, in turn, is influenced only by the first parameter.","title":"Posterior sensitivity analysis"},{"location":"tutorial/09_sensitivity_analysis/#active-subspaces-for-sensitivity-analysis","text":"A standard method to analyse dynamical systems such as models of neural dynamics is to use a sensitivity analysis. We can use the posterior obtained with sbi , to perform such analyses.","title":"Active subspaces for sensitivity analysis"},{"location":"tutorial/09_sensitivity_analysis/#main-syntax","text":"from sbi.analysis import ActiveSubspace sensitivity = ActiveSubspace ( posterior . set_default_x ( x_o )) e_vals , e_vecs = sensitivity . find_directions ( posterior_log_prob_as_property = True ) projected_data = sensitivity . project ( theta_project , num_dimensions = 1 )","title":"Main syntax"},{"location":"tutorial/09_sensitivity_analysis/#example-and-further-explanation","text":"import torch from torch.distributions import MultivariateNormal from sbi.analysis import ActiveSubspace , pairplot from sbi.simulators import linear_gaussian from sbi.inference import simulate_for_sbi , infer _ = torch . manual_seed ( 0 ) Let\u2019s define a simple Gaussian toy example: prior = MultivariateNormal ( 0.0 * torch . ones ( 2 ), 2 * torch . eye ( 2 )) def simulator ( theta ): return linear_gaussian ( theta , - 0.8 * torch . ones ( 2 ), torch . tensor ([[ 1.0 , 0.98 ], [ 0.98 , 1.0 ]]) ) posterior = infer ( simulator , prior , num_simulations = 2000 , method = \"SNPE\" ) . set_default_x ( torch . zeros ( 2 ) ) Running 2000 simulations.: 0%| | 0/2000 [00:00<?, ?it/s] Neural network successfully converged after 117 epochs. posterior_samples = posterior . sample (( 2000 ,)) Drawing 2000 posterior samples: 0%| | 0/2000 [00:00<?, ?it/s] _ = pairplot ( posterior_samples , limits = [[ - 3 , 3 ], [ - 3 , 3 ]], figsize = ( 4 , 4 )) When performing a sensitivity analysis on this model, we would expect that there is one direction that is less sensitive (from bottom left to top right, along the vector [1, 1]) and one direction that is more sensitive (from top left to bottom right, along [1, -1]). We can recover these directions with the ActiveSubspace module in sbi . sensitivity = ActiveSubspace ( posterior ) e_vals , e_vecs = sensitivity . find_directions ( posterior_log_prob_as_property = True ) Drawing 1000 posterior samples: 0%| | 0/1000 [00:00<?, ?it/s] The method .find_active() returns eigenvalues and the corresponding eigenvectors. It does so by computing the matrix: \\(M = \\mathbb{E}_{p(\\theta|x_o)}[\\nabla_{\\theta}p(\\theta|x_o)^T \\nabla_{\\theta}p(\\theta|x_o)\\) ] It then does an eigendecomposition: \\(M = Q \\Lambda Q^{-1}\\) A strong eigenvalue indicates that the gradient of the posterior density is large, i.e. the system output is sensitive to changes along the direction of the corresponding eigenvector (or active ). The eigenvalue corresponding to the vector [0.68, -0.73] is much larger than the eigenvalue of [0.73, 0.67] . This matches the intuition we developed above. print ( \"Eigenvalues: \\n \" , e_vals , \" \\n \" ) print ( \"Eigenvectors: \\n \" , e_vecs ) Eigenvalues: tensor([2.3552e-06, 7.0754e-05]) Eigenvectors: tensor([[-0.7066, -0.7076], [-0.7076, 0.7066]]) Lastly, we can project the data into the active dimensions. In this case, we will just use one active dimension: projected_data = sensitivity . project ( posterior_samples , num_dimensions = 1 )","title":"Example and further explanation"},{"location":"tutorial/09_sensitivity_analysis/#some-technical-details","text":"The gradients and thus the eigenvectors are computed in z-scored space. The mean and standard deviation are computed w.r.t. the prior distribution. Thus, the gradients (and thus the eigenvales) reflect changes on the scale of the prior. The expected value used to compute the matrix \\(M\\) is estimated using 1000 posterior samples. This value can be set with the .find_active(num_monte_carlo_samples=...) variable. How does this relate to Principal Component Analysis (PCA)? In the example above, the results of PCA would be very similar. However, there are two main differences to PCA: First, PCA ignores local changes in the posterior, whereas the active subspace can change a lot (since it computes the gradient, which is a local quantity). Second, active subspaces can be used characterize the sensitivity of any other quantity w.r.t. circuit parameters. This is outlined below:","title":"Some technical details"},{"location":"tutorial/09_sensitivity_analysis/#computing-the-sensitivity-of-a-specific-summary-statistic","text":"Above, we have shown how to identify directions along which the posterior probability changes rapidly. Notably, the posterior probability reflects how consistent a specific parameter set is with all summary statistics, i.e. the entire \\(x_o\\) . Sometimes, we might be interested in investigating how a specific features is influenced by the parameters. This feature could be one of the values of \\(x_o\\) , but it could also be a different property. As a neuroscience example, in Deistler et al. 2021, we obtained the posterior distribution given burst durations and delays between bursts. After having obtained the posterior, we then wanted to analyse the sensitivity of metabolic cost w.r.t. circuit parameters. The framework we presented above can easily be extended to study such questions. prior = MultivariateNormal ( 0.0 * torch . ones ( 2 ), 2 * torch . eye ( 2 )) def simulator ( theta ): return linear_gaussian ( theta , - 0.8 * torch . ones ( 2 ), torch . eye ( 2 )) posterior = infer ( simulator , prior , num_simulations = 2000 , method = \"SNPE\" ) . set_default_x ( torch . zeros ( 2 ) ) Running 2000 simulations.: 0%| | 0/2000 [00:00<?, ?it/s] Neural network successfully converged after 139 epochs. _ = pairplot ( posterior . sample (( 10_000 ,)), limits = [[ - 3 , 3 ], [ - 3 , 3 ]], figsize = ( 4 , 4 )) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] sensitivity = ActiveSubspace ( posterior ) This time, we begin by drawing samples from the posterior and then computing the desired property for each of the samples (i.e. you will probably have to run simulations for each theta and extract the property from the simulation output). As an example, we assume that the property is just the cube of the first dimension of the simulation output: theta , x = simulate_for_sbi ( simulator , posterior , 5000 ) property_ = x [:, : 1 ] ** 3 # E.g. metabolic cost. Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Running 5000 simulations.: 0%| | 0/5000 [00:00<?, ?it/s] To investigate the sensitivity of a given parameter, we train a neural network to predict the property_ from the parameters and then analyse the neural network as above: \\(M = \\mathbb{E}_{p(\\theta|x_o)}[\\nabla_{\\theta}f(\\theta)^T \\nabla_{\\theta}f(\\theta)\\) ] where \\(f(\\cdot)\\) is the trained neural network. _ = sensitivity . add_property ( theta , property_ ) . train () e_vals , e_vecs = sensitivity . find_directions () Training neural network. Epochs trained: 24 Drawing 1000 posterior samples: 0%| | 0/1000 [00:00<?, ?it/s] print ( \"Eigenvalues: \\n \" , e_vals , \" \\n \" ) print ( \"Eigenvectors: \\n \" , e_vecs ) Eigenvalues: tensor([2.8801e-06, 6.1131e-05]) Eigenvectors: tensor([[ 0.0362, 0.9993], [ 0.9993, -0.0362]]) As we can see, one of the eigenvalues is much smaller than the other one. The larger eigenvalue represents (approximately) the vector [1.0, 0.0] . This makes sense, because only the property_ is influenced only by the first output which, in turn, is influenced only by the first parameter.","title":"Computing the sensitivity of a specific summary statistic"},{"location":"tutorial/10_crafting_summary_statistics/","text":"Crafting summary statistics \u00b6 Many simulators produce outputs that are high-dimesional. For example, a simulator might generate a time series or an image. In a previous tutorial , we discussed how a neural networks can be used to learn summary statistics from such data. In this notebook, we will instead focus on hand-crafting summary statistics. We demonstrate that the choice of summary statistics can be crucial for the performance of the inference algorithm. import numpy as np import torch import matplotlib.pyplot as plt import matplotlib as mpl # sbi import sbi.utils as utils from sbi.inference.base import infer from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.utils.get_nn_models import posterior_nn from sbi.analysis import pairplot # remove top and right axis from plots mpl . rcParams [ \"axes.spines.right\" ] = False mpl . rcParams [ \"axes.spines.top\" ] = False This notebook is not intended to provide a one-fits-all approach. In fact it argues against this: it argues for the user to carefully construct their summary statistics to (i) further help the user understand his observed data, (ii) help them understand exactly what they want the model to recover from the observation and (iii) help the inference framework itself. Example 1: The quadratic function \u00b6 Assume we have a simulator that is given by a quadratic function: \\(x(t) = a\\cdot t^2 + b\\cdot t + c + \\epsilon\\) , where \\(\\epsilon\\) is Gaussian observation noise and \\(\\theta = \\{a, b, c\\}\\) are the parameters. Given an observed quadratic function \\(x_o\\) , we would like to recover the posterior over parameters \\(a_o\\) , \\(b_o\\) and \\(c_o\\) . 1.1 Prior over parameters \u00b6 First we define a prior distribution over parameters \\(a\\) , \\(b\\) and \\(c\\) . Here, we use a uniform prior for \\(a\\) , \\(b\\) and \\(c\\) to go from \\(-1\\) to \\(1\\) . prior_min = [ - 1 , - 1 , - 1 ] prior_max = [ 1 , 1 , 1 ] prior = utils . torchutils . BoxUniform ( low = torch . as_tensor ( prior_min ), high = torch . as_tensor ( prior_max ) ) 1.2 Simulator \u00b6 Defining some helper functions first: def create_t_x ( theta , seed = None ): \"\"\"Return an t, x array for plotting based on params\"\"\" if theta . ndim == 1 : theta = theta [ np . newaxis , :] if seed is not None : rng = np . random . RandomState ( seed ) else : rng = np . random . RandomState () t = np . linspace ( - 1 , 1 , 200 ) ts = np . repeat ( t [:, np . newaxis ], theta . shape [ 0 ], axis = 1 ) x = ( theta [:, 0 ] * ts ** 2 + theta [:, 1 ] * ts + theta [:, 2 ] + 0.01 * rng . randn ( ts . shape [ 0 ], theta . shape [ 0 ]) ) return t , x def eval ( theta , t , seed = None ): \"\"\"Evaluate the quadratic function at `t`\"\"\" if theta . ndim == 1 : theta = theta [ np . newaxis , :] if seed is not None : rng = np . random . RandomState ( seed ) else : rng = np . random . RandomState () return theta [:, 0 ] * t ** 2 + theta [:, 1 ] * t + theta [:, 2 ] + 0.01 * rng . randn ( 1 ) In this example, we generate the observation \\(x_o\\) from parameters \\(\\theta_o=(a_o, b_o, c_o)=(0.3, -0.2, -0.1)\\) . The observation as follows. theta_o = np . array ([ 0.3 , - 0.2 , - 0.1 ]) t , x = create_t_x ( theta_o ) plt . plot ( t , x , \"k\" ) [<matplotlib.lines.Line2D at 0x7f828b191d60>] 1.3 Summary statistics \u00b6 We will compare two methods for defining summary statistics. One method uses three summary statistics which are function evaluations at three points in time. The other method uses a single summary statistic: the mean squared error between the observed and the simulated trace. In the second case, one then tries to obtain the posterior \\(p(\\theta | 0)\\) , i.e. the error being zero. These two methods are implemented below: \\(\\textbf{get_3_values()}\\) returns 3 function evaluations at \\(x=-0.5, x=0\\) and \\(x=0.75\\) . \\(\\textbf{get_MSE()}\\) returns the mean squared error between true and a quadratic function corresponding to a prior distributions sample. def get_3_values ( theta , seed = None ): \"\"\" Return 3 'y' values corresponding to x=-0.5,0,0.75 as summary statistic vector \"\"\" return np . array ( [ eval ( theta , - 0.5 , seed = seed ), eval ( theta , 0 , seed = seed ), eval ( theta , 0.75 , seed = seed ), ] ) . T def get_MSE ( theta , theta_o , seed = None ): \"\"\" Return the mean-squared error (MSE) i.e. Euclidean distance from the observation function \"\"\" _ , y = create_t_x ( theta_o , seed = seed ) # truth _ , y_ = create_t_x ( theta , seed = seed ) # simulations return np . mean ( np . square ( y_ - y ), axis = 0 , keepdims = True ) . T # MSE Let\u2019s try a couple of samples from our prior and see their summary statistics. Notice that these indeed change in small amounts every time you rerun it due to the noise, except if you set the seed. 1.4 Simulating data \u00b6 Let us see various plots of prior samples and their summary statistics versus the truth, i.e. our artificial observation. t , x_truth = create_t_x ( theta_o ) plt . plot ( t , x_truth , \"k\" , zorder = 1 , label = \"truth\" ) n_samples = 100 theta = prior . sample (( n_samples ,)) t , x = create_t_x ( theta . numpy ()) plt . plot ( t , x , \"grey\" , zorder = 0 ) plt . legend () <matplotlib.legend.Legend at 0x7f8289154eb0> In summary, we defined reasonable summary statistics and, a priori, there might be an appararent reason why one method would be better than another. When we do inference, we\u2019d like our posterior to focus around parameter samples that have their simulated MSE very close to 0 (i.e. the truth MSE summary statistic) or their 3 extracted \\((t, x)\\) coordinates to be the truthful ones. 1.5 Inference \u00b6 1.5.1 Using the MSE \u00b6 Let\u2019s see if we can use the MSE to recover the true observation parameters \\(\\theta_o=(a_0,b_0,c_0)\\) . theta = prior . sample (( 1000 ,)) x = get_MSE ( theta . numpy (), theta_o ) theta = torch . as_tensor ( theta , dtype = torch . float32 ) x = torch . as_tensor ( x , dtype = torch . float32 ) inference = SNPE ( prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () Neural network successfully converged after 181 epochs. Now that we\u2019ve build the posterior as such, we can see how likely it finds certain parameters given that we tell it that we\u2019ve observed a certain summary statistic (in this case the MSE). We can then sample from it. x_o = torch . as_tensor ( [ [ 0.0 , ] ] ) theta_p = posterior . sample (( 10000 ,), x = x_o ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] fig , axes = pairplot ( theta_p , limits = list ( zip ( prior_min , prior_max )), ticks = list ( zip ( prior_min , prior_max )), figsize = ( 7 , 7 ), labels = [ \"a\" , \"b\" , \"c\" ], points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , points = theta_o , ); The posterior seems to pretty broad: i.e. it is not so certain about the \u2018true\u2019 parameters (here showcased in red). x_o_t , x_o_x = create_t_x ( theta_o ) plt . plot ( x_o_t , x_o_x , \"k\" , zorder = 1 , label = \"truth\" ) theta_p = posterior . sample (( 10 ,), x = x_o ) x_t , x_x = create_t_x ( theta_p . numpy ()) plt . plot ( x_t , x_x , \"grey\" , zorder = 0 ) plt . legend () Drawing 10 posterior samples: 0%| | 0/10 [00:00<?, ?it/s] <matplotlib.legend.Legend at 0x7f82882cd670> The functions are a bit closer to the observation than prior samples, but many posterior samples generate activity that is very far off from the observation. We would expect sbi do better on such a simple example. So what\u2019s going on? Do we need more simulations? Feel free to try, but below we will show that one can use the same number of simulation samples with different summary statistics and do much better. 1.5.2 Using 3 coordinates as summary statistics \u00b6 x = get_3_values ( theta . numpy ()) x = torch . as_tensor ( x , dtype = torch . float32 ) inference = SNPE ( prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () Neural network successfully converged after 127 epochs. The observation is now given by the values of the observed trace at three different coordinates: x_o = torch . as_tensor ( get_3_values ( theta_o ), dtype = float ) theta_p = posterior . sample (( 10000 ,), x = x_o ) fig , axes = pairplot ( theta_p , limits = list ( zip ( prior_min , prior_max )), ticks = list ( zip ( prior_min , prior_max )), figsize = ( 7 , 7 ), labels = [ \"a\" , \"b\" , \"c\" ], points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , points = theta_o , ); Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] x_o_x , x_o_y = create_t_x ( theta_o ) plt . plot ( x_o_x , x_o_y , \"k\" , zorder = 1 , label = \"truth\" ) theta_p = posterior . sample (( 100 ,), x = x_o ) ind_10_highest = np . argsort ( np . array ( posterior . log_prob ( theta = theta_p , x = x_o )))[ - 10 :] theta_p_considered = theta_p [ ind_10_highest , :] x_x , x_y = create_t_x ( theta_p_considered . numpy ()) plt . plot ( x_x , x_y , \"grey\" , zorder = 0 ) plt . legend () Drawing 100 posterior samples: 0%| | 0/100 [00:00<?, ?it/s] <matplotlib.legend.Legend at 0x7f82885b4af0> Ok this definitely seems to work! The posterior correctly focuses on the true parameters with greater confidence. You can experiment yourself how this improves further with more training samples or you could try to see how many you\u2019d exactly need to keep having a satisfyingly looking posterior and high posterior sample simulations. So, what\u2019s up with the MSE? Why does it not seem so informative to constrain the posterior? In 1.6, we\u2019ll see both the power and pitfalls of summary statistics. 1.6 Prior simulations\u2019 summary statistics vs observed summary statistics \u00b6 Let\u2019s try to understand this\u2026Let\u2019s look at a histogram of the four summary statistics we\u2019ve experimented with, and see how they compare to our observed truth summary statistic vector: stats = np . concatenate ( ( get_3_values ( theta . numpy ()), get_MSE ( theta . numpy (), theta_o )), axis = 1 ) x_o = np . concatenate (( get_3_values ( theta_o ), np . asarray ([[ 0.0 ]])), axis = 1 ) features = [ \"y @ x=-0.5\" , \"y @ x=0\" , \"y @ x=0.7\" , \"MSE\" ] fig , axes = plt . subplots ( 1 , 4 , figsize = ( 10 , 3 )) xlabelfontsize = 10 for i , ax in enumerate ( axes . reshape ( - 1 )): ax . hist ( stats [:, i ], color = [ \"grey\" ], alpha = 0.5 , bins = 30 , density = True , histtype = \"stepfilled\" , label = [ \"simulations\" ], ) ax . axvline ( x_o [:, i ], label = \"observation\" ) ax . set_xlabel ( features [ i ], fontsize = xlabelfontsize ) if i == 3 : ax . legend () plt . tight_layout () We see that for the coordinates (three plots on the left), simulations cover the observation. That is: it covers it from the left and right side in each case. For the MSE, simulations never truly reach the observation \\(0.0\\) . For the trained neural network, it is strongly preferable if the simulations cover the observation. In that case, the neural network can interpolate between simulated data. Contrary to that, for the MSE, the neural network has to extrapolate : it never observes a simulation that is to the left of the observation and has to extrapolate to the region of MSE= \\(0.0\\) . This seems like a technical point but, as we saw above, it makes a huge difference in performance. 1.7 Explicit recommendations \u00b6 We give some explicit recommendation when using summary statistics Visualize the histogram of each summary statistic and plot the value of the observation. If, for some summary statistics, the observation is not covered (or is at the very border, e.g. the MSE above), the trained neural network will struggle. Do not use an \u201cerror\u201d as summary statistic. This is common in optimization (e.g. genetic algorithms), but it often leads to trouble in sbi due to the reason above. Only use summary statistics that are necessary. The less summary statistics you use, the less can go wrong with them. Of course, you have to ensure that the summary statistics describe the raw data sufficiently well.","title":"Crafting summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#crafting-summary-statistics","text":"Many simulators produce outputs that are high-dimesional. For example, a simulator might generate a time series or an image. In a previous tutorial , we discussed how a neural networks can be used to learn summary statistics from such data. In this notebook, we will instead focus on hand-crafting summary statistics. We demonstrate that the choice of summary statistics can be crucial for the performance of the inference algorithm. import numpy as np import torch import matplotlib.pyplot as plt import matplotlib as mpl # sbi import sbi.utils as utils from sbi.inference.base import infer from sbi.inference import SNPE , prepare_for_sbi , simulate_for_sbi from sbi.utils.get_nn_models import posterior_nn from sbi.analysis import pairplot # remove top and right axis from plots mpl . rcParams [ \"axes.spines.right\" ] = False mpl . rcParams [ \"axes.spines.top\" ] = False This notebook is not intended to provide a one-fits-all approach. In fact it argues against this: it argues for the user to carefully construct their summary statistics to (i) further help the user understand his observed data, (ii) help them understand exactly what they want the model to recover from the observation and (iii) help the inference framework itself.","title":"Crafting summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#example-1-the-quadratic-function","text":"Assume we have a simulator that is given by a quadratic function: \\(x(t) = a\\cdot t^2 + b\\cdot t + c + \\epsilon\\) , where \\(\\epsilon\\) is Gaussian observation noise and \\(\\theta = \\{a, b, c\\}\\) are the parameters. Given an observed quadratic function \\(x_o\\) , we would like to recover the posterior over parameters \\(a_o\\) , \\(b_o\\) and \\(c_o\\) .","title":"Example 1: The quadratic function"},{"location":"tutorial/10_crafting_summary_statistics/#11-prior-over-parameters","text":"First we define a prior distribution over parameters \\(a\\) , \\(b\\) and \\(c\\) . Here, we use a uniform prior for \\(a\\) , \\(b\\) and \\(c\\) to go from \\(-1\\) to \\(1\\) . prior_min = [ - 1 , - 1 , - 1 ] prior_max = [ 1 , 1 , 1 ] prior = utils . torchutils . BoxUniform ( low = torch . as_tensor ( prior_min ), high = torch . as_tensor ( prior_max ) )","title":"1.1 Prior over parameters"},{"location":"tutorial/10_crafting_summary_statistics/#12-simulator","text":"Defining some helper functions first: def create_t_x ( theta , seed = None ): \"\"\"Return an t, x array for plotting based on params\"\"\" if theta . ndim == 1 : theta = theta [ np . newaxis , :] if seed is not None : rng = np . random . RandomState ( seed ) else : rng = np . random . RandomState () t = np . linspace ( - 1 , 1 , 200 ) ts = np . repeat ( t [:, np . newaxis ], theta . shape [ 0 ], axis = 1 ) x = ( theta [:, 0 ] * ts ** 2 + theta [:, 1 ] * ts + theta [:, 2 ] + 0.01 * rng . randn ( ts . shape [ 0 ], theta . shape [ 0 ]) ) return t , x def eval ( theta , t , seed = None ): \"\"\"Evaluate the quadratic function at `t`\"\"\" if theta . ndim == 1 : theta = theta [ np . newaxis , :] if seed is not None : rng = np . random . RandomState ( seed ) else : rng = np . random . RandomState () return theta [:, 0 ] * t ** 2 + theta [:, 1 ] * t + theta [:, 2 ] + 0.01 * rng . randn ( 1 ) In this example, we generate the observation \\(x_o\\) from parameters \\(\\theta_o=(a_o, b_o, c_o)=(0.3, -0.2, -0.1)\\) . The observation as follows. theta_o = np . array ([ 0.3 , - 0.2 , - 0.1 ]) t , x = create_t_x ( theta_o ) plt . plot ( t , x , \"k\" ) [<matplotlib.lines.Line2D at 0x7f828b191d60>]","title":"1.2 Simulator"},{"location":"tutorial/10_crafting_summary_statistics/#13-summary-statistics","text":"We will compare two methods for defining summary statistics. One method uses three summary statistics which are function evaluations at three points in time. The other method uses a single summary statistic: the mean squared error between the observed and the simulated trace. In the second case, one then tries to obtain the posterior \\(p(\\theta | 0)\\) , i.e. the error being zero. These two methods are implemented below: \\(\\textbf{get_3_values()}\\) returns 3 function evaluations at \\(x=-0.5, x=0\\) and \\(x=0.75\\) . \\(\\textbf{get_MSE()}\\) returns the mean squared error between true and a quadratic function corresponding to a prior distributions sample. def get_3_values ( theta , seed = None ): \"\"\" Return 3 'y' values corresponding to x=-0.5,0,0.75 as summary statistic vector \"\"\" return np . array ( [ eval ( theta , - 0.5 , seed = seed ), eval ( theta , 0 , seed = seed ), eval ( theta , 0.75 , seed = seed ), ] ) . T def get_MSE ( theta , theta_o , seed = None ): \"\"\" Return the mean-squared error (MSE) i.e. Euclidean distance from the observation function \"\"\" _ , y = create_t_x ( theta_o , seed = seed ) # truth _ , y_ = create_t_x ( theta , seed = seed ) # simulations return np . mean ( np . square ( y_ - y ), axis = 0 , keepdims = True ) . T # MSE Let\u2019s try a couple of samples from our prior and see their summary statistics. Notice that these indeed change in small amounts every time you rerun it due to the noise, except if you set the seed.","title":"1.3 Summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#14-simulating-data","text":"Let us see various plots of prior samples and their summary statistics versus the truth, i.e. our artificial observation. t , x_truth = create_t_x ( theta_o ) plt . plot ( t , x_truth , \"k\" , zorder = 1 , label = \"truth\" ) n_samples = 100 theta = prior . sample (( n_samples ,)) t , x = create_t_x ( theta . numpy ()) plt . plot ( t , x , \"grey\" , zorder = 0 ) plt . legend () <matplotlib.legend.Legend at 0x7f8289154eb0> In summary, we defined reasonable summary statistics and, a priori, there might be an appararent reason why one method would be better than another. When we do inference, we\u2019d like our posterior to focus around parameter samples that have their simulated MSE very close to 0 (i.e. the truth MSE summary statistic) or their 3 extracted \\((t, x)\\) coordinates to be the truthful ones.","title":"1.4 Simulating data"},{"location":"tutorial/10_crafting_summary_statistics/#15-inference","text":"","title":"1.5 Inference"},{"location":"tutorial/10_crafting_summary_statistics/#151-using-the-mse","text":"Let\u2019s see if we can use the MSE to recover the true observation parameters \\(\\theta_o=(a_0,b_0,c_0)\\) . theta = prior . sample (( 1000 ,)) x = get_MSE ( theta . numpy (), theta_o ) theta = torch . as_tensor ( theta , dtype = torch . float32 ) x = torch . as_tensor ( x , dtype = torch . float32 ) inference = SNPE ( prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () Neural network successfully converged after 181 epochs. Now that we\u2019ve build the posterior as such, we can see how likely it finds certain parameters given that we tell it that we\u2019ve observed a certain summary statistic (in this case the MSE). We can then sample from it. x_o = torch . as_tensor ( [ [ 0.0 , ] ] ) theta_p = posterior . sample (( 10000 ,), x = x_o ) Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] fig , axes = pairplot ( theta_p , limits = list ( zip ( prior_min , prior_max )), ticks = list ( zip ( prior_min , prior_max )), figsize = ( 7 , 7 ), labels = [ \"a\" , \"b\" , \"c\" ], points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , points = theta_o , ); The posterior seems to pretty broad: i.e. it is not so certain about the \u2018true\u2019 parameters (here showcased in red). x_o_t , x_o_x = create_t_x ( theta_o ) plt . plot ( x_o_t , x_o_x , \"k\" , zorder = 1 , label = \"truth\" ) theta_p = posterior . sample (( 10 ,), x = x_o ) x_t , x_x = create_t_x ( theta_p . numpy ()) plt . plot ( x_t , x_x , \"grey\" , zorder = 0 ) plt . legend () Drawing 10 posterior samples: 0%| | 0/10 [00:00<?, ?it/s] <matplotlib.legend.Legend at 0x7f82882cd670> The functions are a bit closer to the observation than prior samples, but many posterior samples generate activity that is very far off from the observation. We would expect sbi do better on such a simple example. So what\u2019s going on? Do we need more simulations? Feel free to try, but below we will show that one can use the same number of simulation samples with different summary statistics and do much better.","title":"1.5.1 Using the MSE"},{"location":"tutorial/10_crafting_summary_statistics/#152-using-3-coordinates-as-summary-statistics","text":"x = get_3_values ( theta . numpy ()) x = torch . as_tensor ( x , dtype = torch . float32 ) inference = SNPE ( prior ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () Neural network successfully converged after 127 epochs. The observation is now given by the values of the observed trace at three different coordinates: x_o = torch . as_tensor ( get_3_values ( theta_o ), dtype = float ) theta_p = posterior . sample (( 10000 ,), x = x_o ) fig , axes = pairplot ( theta_p , limits = list ( zip ( prior_min , prior_max )), ticks = list ( zip ( prior_min , prior_max )), figsize = ( 7 , 7 ), labels = [ \"a\" , \"b\" , \"c\" ], points_offdiag = { \"markersize\" : 6 }, points_colors = \"r\" , points = theta_o , ); Drawing 10000 posterior samples: 0%| | 0/10000 [00:00<?, ?it/s] x_o_x , x_o_y = create_t_x ( theta_o ) plt . plot ( x_o_x , x_o_y , \"k\" , zorder = 1 , label = \"truth\" ) theta_p = posterior . sample (( 100 ,), x = x_o ) ind_10_highest = np . argsort ( np . array ( posterior . log_prob ( theta = theta_p , x = x_o )))[ - 10 :] theta_p_considered = theta_p [ ind_10_highest , :] x_x , x_y = create_t_x ( theta_p_considered . numpy ()) plt . plot ( x_x , x_y , \"grey\" , zorder = 0 ) plt . legend () Drawing 100 posterior samples: 0%| | 0/100 [00:00<?, ?it/s] <matplotlib.legend.Legend at 0x7f82885b4af0> Ok this definitely seems to work! The posterior correctly focuses on the true parameters with greater confidence. You can experiment yourself how this improves further with more training samples or you could try to see how many you\u2019d exactly need to keep having a satisfyingly looking posterior and high posterior sample simulations. So, what\u2019s up with the MSE? Why does it not seem so informative to constrain the posterior? In 1.6, we\u2019ll see both the power and pitfalls of summary statistics.","title":"1.5.2 Using 3 coordinates as summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#16-prior-simulations-summary-statistics-vs-observed-summary-statistics","text":"Let\u2019s try to understand this\u2026Let\u2019s look at a histogram of the four summary statistics we\u2019ve experimented with, and see how they compare to our observed truth summary statistic vector: stats = np . concatenate ( ( get_3_values ( theta . numpy ()), get_MSE ( theta . numpy (), theta_o )), axis = 1 ) x_o = np . concatenate (( get_3_values ( theta_o ), np . asarray ([[ 0.0 ]])), axis = 1 ) features = [ \"y @ x=-0.5\" , \"y @ x=0\" , \"y @ x=0.7\" , \"MSE\" ] fig , axes = plt . subplots ( 1 , 4 , figsize = ( 10 , 3 )) xlabelfontsize = 10 for i , ax in enumerate ( axes . reshape ( - 1 )): ax . hist ( stats [:, i ], color = [ \"grey\" ], alpha = 0.5 , bins = 30 , density = True , histtype = \"stepfilled\" , label = [ \"simulations\" ], ) ax . axvline ( x_o [:, i ], label = \"observation\" ) ax . set_xlabel ( features [ i ], fontsize = xlabelfontsize ) if i == 3 : ax . legend () plt . tight_layout () We see that for the coordinates (three plots on the left), simulations cover the observation. That is: it covers it from the left and right side in each case. For the MSE, simulations never truly reach the observation \\(0.0\\) . For the trained neural network, it is strongly preferable if the simulations cover the observation. In that case, the neural network can interpolate between simulated data. Contrary to that, for the MSE, the neural network has to extrapolate : it never observes a simulation that is to the left of the observation and has to extrapolate to the region of MSE= \\(0.0\\) . This seems like a technical point but, as we saw above, it makes a huge difference in performance.","title":"1.6 Prior simulations' summary statistics vs observed summary statistics"},{"location":"tutorial/10_crafting_summary_statistics/#17-explicit-recommendations","text":"We give some explicit recommendation when using summary statistics Visualize the histogram of each summary statistic and plot the value of the observation. If, for some summary statistics, the observation is not covered (or is at the very border, e.g. the MSE above), the trained neural network will struggle. Do not use an \u201cerror\u201d as summary statistic. This is common in optimization (e.g. genetic algorithms), but it often leads to trouble in sbi due to the reason above. Only use summary statistics that are necessary. The less summary statistics you use, the less can go wrong with them. Of course, you have to ensure that the summary statistics describe the raw data sufficiently well.","title":"1.7 Explicit recommendations"},{"location":"tutorial/11_sampler_interface/","text":"Sampling algorithms in sbi \u00b6 Note: this tutorial requires that the user is already familiar with the flexible interface . sbi implements three methods: SNPE, SNLE, and SNRE. When using SNPE, the trained neural network directly approximates the posterior. Thus, sampling from the posterior can be done by sampling from the trained neural network. The neural networks trained in SNLE and SNRE approximate the likelihood(-ratio). Thus, in order to draw samples from the posterior, one has to perform additional sampling steps, e.g. Markov-chain Monte-Carlo (MCMC). In sbi , the implemented samplers are: Markov-chain Monte-Carlo (MCMC) Rejection sampling Variational inference (VI) Below, we will demonstrate how these samplers can be used in sbi . First, we train the neural network as always: import torch from sbi.inference import SNLE # dummy Gaussian simulator for demonstration num_dim = 2 prior = torch . distributions . MultivariateNormal ( torch . zeros ( num_dim ), torch . eye ( num_dim )) theta = prior . sample (( 1000 ,)) x = theta + torch . randn (( 1000 , num_dim )) x_o = torch . randn (( 1 , num_dim )) inference = SNLE ( prior = prior , show_progress_bars = False ) likelihood_estimator = inference . append_simulations ( theta , x ) . train () And then we pass the options for which sampling method to use to the build_posterior() method: # Sampling with MCMC sampling_algorithm = \"mcmc\" mcmc_method = \"slice_np\" # or nuts, or hmc posterior = inference . build_posterior ( sample_with = sampling_algorithm , mcmc_method = mcmc_method ) # Sampling with variational inference sampling_algorithm = \"vi\" vi_method = \"rKL\" # or fKL posterior = inference . build_posterior ( sample_with = sampling_algorithm , vi_method = vi_method ) # Unlike other methods, vi needs a training step for every observation. posterior = posterior . set_default_x ( x_o ) . train () # Sampling with rejection sampling sampling_algorithm = \"rejection\" posterior = inference . build_posterior ( sample_with = sampling_algorithm ) More flexibility in adjusting the sampler \u00b6 With the above syntax, you can easily try out different sampling algorithms. However, in many cases, you might want to customize your sampler. Below, we demonstrate how you can change hyperparameters of the samplers (e.g. number of warm-up steps of MCMC) or how you can write your own sampler from scratch. Main syntax (for SNLE and SNRE) \u00b6 As above, we begin by training the neural network as always: import torch from sbi.inference import SNLE # dummy Gaussian simulator for demonstration num_dim = 2 prior = torch . distributions . MultivariateNormal ( torch . zeros ( num_dim ), torch . eye ( num_dim )) theta = prior . sample (( 1000 ,)) x = theta + torch . randn (( 1000 , num_dim )) x_o = torch . randn (( 1 , num_dim )) inference = SNLE ( show_progress_bars = False ) likelihood_estimator = inference . append_simulations ( theta , x ) . train () Neural network successfully converged after 52 epochs. Then, for full flexibility on using the sampler, we do not use the .build_posterior() method, but instead we explicitly define the potential function and the sampling algorithm (see below for explanation): from sbi.inference import likelihood_estimator_based_potential , MCMCPosterior potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform , warmup_steps = 10 ) If you want to use variational inference or rejection sampling, you have to replace the last line with VIPosterior or RejectionPosterior : # For VI, we have to train. posterior = VIPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) . train () posterior = RejectionPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) At this point, you could also plug the potential_fn into any sampler of your choice and not rely on any of the in-built sbi -samplers. Further explanation \u00b6 The first lines are the same as for the flexible interface: inference = SNLE () likelihood_estimator = inference . append_simulations ( theta , x ) . train () Neural network successfully converged after 33 epochs. Next, we obtain the potential function. A potential function is a function of the parameter \\(f(\\theta)\\) . The posterior is proportional to the product of likelihood and prior: \\(p(\\theta | x_o) \\propto p(x_o | \\theta)p(\\theta)\\) . The potential function is the logarithm of the right-hand side of this equation: \\(f(\\theta) = \\log(p(x_o | \\theta)p(\\theta))\\) potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) By calling the potential_fn , you can evaluate the potential: # Assuming that your parameters are 1D. potential = potential_fn ( torch . zeros ( 1 , num_dim ) ) # -> returns f(0) = log( p(x_o|0) p(0) ) The other object that is returned by likelihood_estimator_based_potential is a parameter_transform . The parameter_transform is a pytorch transform . The parameter_transform is a fixed transform that is can be applied to parameter theta . It transforms the parameters into unconstrained space (if the prior is bounded, e.g. BoxUniform ), and standardizes the parameters (i.e. zero mean, one std). Using parameter_transform during sampling is optional, but it usually improves the performance of MCMC. theta_tf = parameter_transform ( torch . zeros ( 1 , num_dim )) theta_original = parameter_transform . inv ( theta_tf ) print ( theta_original ) # -> tensor([[0.0]]) tensor([[0., 0.]]) After having obtained the potential_fn , we can sample from the posterior with MCMC or rejection sampling: from sbi.inference import MCMCPosterior , RejectionPosterior posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) posterior = RejectionPosterior ( potential_fn , proposal = prior ) Main syntax for SNPE \u00b6 SNPE usually does not require MCMC or rejection sampling (if you still need it, you can use the same syntax as above with the posterior_estimator_based_potential function). Instead, SNPE samples from the neural network. If the support of the prior is bounded, some samples can lie outside of the support of the prior. The DirectPosterior class automatically rejects these samples: from sbi.inference import SNPE from sbi.inference import DirectPosterior inference = SNPE () posterior_estimator = inference . append_simulations ( theta , x ) . train () posterior = DirectPosterior ( posterior_estimator , prior = prior ) Neural network successfully converged after 57 epochs.","title":"Sampling algorithms in sbi"},{"location":"tutorial/11_sampler_interface/#sampling-algorithms-in-sbi","text":"Note: this tutorial requires that the user is already familiar with the flexible interface . sbi implements three methods: SNPE, SNLE, and SNRE. When using SNPE, the trained neural network directly approximates the posterior. Thus, sampling from the posterior can be done by sampling from the trained neural network. The neural networks trained in SNLE and SNRE approximate the likelihood(-ratio). Thus, in order to draw samples from the posterior, one has to perform additional sampling steps, e.g. Markov-chain Monte-Carlo (MCMC). In sbi , the implemented samplers are: Markov-chain Monte-Carlo (MCMC) Rejection sampling Variational inference (VI) Below, we will demonstrate how these samplers can be used in sbi . First, we train the neural network as always: import torch from sbi.inference import SNLE # dummy Gaussian simulator for demonstration num_dim = 2 prior = torch . distributions . MultivariateNormal ( torch . zeros ( num_dim ), torch . eye ( num_dim )) theta = prior . sample (( 1000 ,)) x = theta + torch . randn (( 1000 , num_dim )) x_o = torch . randn (( 1 , num_dim )) inference = SNLE ( prior = prior , show_progress_bars = False ) likelihood_estimator = inference . append_simulations ( theta , x ) . train () And then we pass the options for which sampling method to use to the build_posterior() method: # Sampling with MCMC sampling_algorithm = \"mcmc\" mcmc_method = \"slice_np\" # or nuts, or hmc posterior = inference . build_posterior ( sample_with = sampling_algorithm , mcmc_method = mcmc_method ) # Sampling with variational inference sampling_algorithm = \"vi\" vi_method = \"rKL\" # or fKL posterior = inference . build_posterior ( sample_with = sampling_algorithm , vi_method = vi_method ) # Unlike other methods, vi needs a training step for every observation. posterior = posterior . set_default_x ( x_o ) . train () # Sampling with rejection sampling sampling_algorithm = \"rejection\" posterior = inference . build_posterior ( sample_with = sampling_algorithm )","title":"Sampling algorithms in sbi"},{"location":"tutorial/11_sampler_interface/#more-flexibility-in-adjusting-the-sampler","text":"With the above syntax, you can easily try out different sampling algorithms. However, in many cases, you might want to customize your sampler. Below, we demonstrate how you can change hyperparameters of the samplers (e.g. number of warm-up steps of MCMC) or how you can write your own sampler from scratch.","title":"More flexibility in adjusting the sampler"},{"location":"tutorial/11_sampler_interface/#main-syntax-for-snle-and-snre","text":"As above, we begin by training the neural network as always: import torch from sbi.inference import SNLE # dummy Gaussian simulator for demonstration num_dim = 2 prior = torch . distributions . MultivariateNormal ( torch . zeros ( num_dim ), torch . eye ( num_dim )) theta = prior . sample (( 1000 ,)) x = theta + torch . randn (( 1000 , num_dim )) x_o = torch . randn (( 1 , num_dim )) inference = SNLE ( show_progress_bars = False ) likelihood_estimator = inference . append_simulations ( theta , x ) . train () Neural network successfully converged after 52 epochs. Then, for full flexibility on using the sampler, we do not use the .build_posterior() method, but instead we explicitly define the potential function and the sampling algorithm (see below for explanation): from sbi.inference import likelihood_estimator_based_potential , MCMCPosterior potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform , warmup_steps = 10 ) If you want to use variational inference or rejection sampling, you have to replace the last line with VIPosterior or RejectionPosterior : # For VI, we have to train. posterior = VIPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) . train () posterior = RejectionPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) At this point, you could also plug the potential_fn into any sampler of your choice and not rely on any of the in-built sbi -samplers.","title":"Main syntax (for SNLE and SNRE)"},{"location":"tutorial/11_sampler_interface/#further-explanation","text":"The first lines are the same as for the flexible interface: inference = SNLE () likelihood_estimator = inference . append_simulations ( theta , x ) . train () Neural network successfully converged after 33 epochs. Next, we obtain the potential function. A potential function is a function of the parameter \\(f(\\theta)\\) . The posterior is proportional to the product of likelihood and prior: \\(p(\\theta | x_o) \\propto p(x_o | \\theta)p(\\theta)\\) . The potential function is the logarithm of the right-hand side of this equation: \\(f(\\theta) = \\log(p(x_o | \\theta)p(\\theta))\\) potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) By calling the potential_fn , you can evaluate the potential: # Assuming that your parameters are 1D. potential = potential_fn ( torch . zeros ( 1 , num_dim ) ) # -> returns f(0) = log( p(x_o|0) p(0) ) The other object that is returned by likelihood_estimator_based_potential is a parameter_transform . The parameter_transform is a pytorch transform . The parameter_transform is a fixed transform that is can be applied to parameter theta . It transforms the parameters into unconstrained space (if the prior is bounded, e.g. BoxUniform ), and standardizes the parameters (i.e. zero mean, one std). Using parameter_transform during sampling is optional, but it usually improves the performance of MCMC. theta_tf = parameter_transform ( torch . zeros ( 1 , num_dim )) theta_original = parameter_transform . inv ( theta_tf ) print ( theta_original ) # -> tensor([[0.0]]) tensor([[0., 0.]]) After having obtained the potential_fn , we can sample from the posterior with MCMC or rejection sampling: from sbi.inference import MCMCPosterior , RejectionPosterior posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform ) posterior = RejectionPosterior ( potential_fn , proposal = prior )","title":"Further explanation"},{"location":"tutorial/11_sampler_interface/#main-syntax-for-snpe","text":"SNPE usually does not require MCMC or rejection sampling (if you still need it, you can use the same syntax as above with the posterior_estimator_based_potential function). Instead, SNPE samples from the neural network. If the support of the prior is bounded, some samples can lie outside of the support of the prior. The DirectPosterior class automatically rejects these samples: from sbi.inference import SNPE from sbi.inference import DirectPosterior inference = SNPE () posterior_estimator = inference . append_simulations ( theta , x ) . train () posterior = DirectPosterior ( posterior_estimator , prior = prior ) Neural network successfully converged after 57 epochs.","title":"Main syntax for SNPE"},{"location":"tutorial/12_diagnostics_posterior_predictive_check/","text":"Posterior Predictive Checks (PPC) in SBI \u00b6 A common safety check performed as part of inference are Posterior Predictive Checks (PPC) . A PPC compares data \\(x_{\\text{pp}}\\) generated using the parameters \\(\\theta_{\\text{posterior}}\\) sampled from the posterior with the observed data \\(x_o\\) . The general concept is that -if the inference is correct- the generated data \\(x_{\\text{pp}}\\) should \u201clook similar\u201d the oberved data \\(x_0\\) . Said differently, \\(x_o\\) should be within the support of \\(x_{\\text{pp}}\\) . A PPC usually shouldn\u2019t be used as a validation metric . Nonetheless a PPC is a good start for an inference diagnosis and can provide with an intuition about any bias introduced in inference: does \\(x_{\\text{pp}}\\) systematically differ from \\(x_o\\) ? Main syntax \u00b6 from sbi.analysis import pairplot # A PPC is performed after we trained or neural posterior posterior . set_default_x ( x_o ) # We draw theta samples from the posterior. This part is not in the scope of SBI posterior_samples = posterior . sample (( 5_000 ,)) # We use posterior theta samples to generate x data x_pp = simulator ( posterior_samples ) # We verify if the observed data falls within the support of the generated data _ = pairplot ( samples = x_pp , points = x_o ) Performing a PPC over a toy example \u00b6 Below we provide an example Posterior Predictive Check (PPC) over some toy example: from sbi.analysis import pairplot import torch _ = torch . manual_seed ( 0 ) We work on an inference problem over three parameters using any of the techniques implemented in sbi . In this tutorial, we load the dummy posterior: from toy_posterior_for_07_cc import ExamplePosterior posterior = ExamplePosterior () Let us say that we are observing the data point \\(x_o\\) : D = 5 # simulator output was 5-dimensional x_o = torch . ones ( 1 , D ) posterior . set_default_x ( x_o ) The posterior can be used to draw \\(\\theta_{\\text{posterior}}\\) samples: posterior_samples = posterior . sample (( 5_000 ,)) fig , ax = pairplot ( samples = posterior_samples , limits = torch . tensor ([[ - 2.5 , 2.5 ]] * 3 ), offdiag = [ \"kde\" ], diag = [ \"kde\" ], figsize = ( 5 , 5 ), labels = [ rf \"$\\theta_ { d } $\" for d in range ( 3 )], ) Now we can use our simulator to generate some data \\(x_{\\text{PP}}\\) , using as input parameters the poterior samples \\(\\theta_{\\text{posterior}}\\) . Note that the simulation part is not in the sbi scope, so any simulator -including a non-Python one- can be used at this stage. In our case we\u2019ll use a dummy simulator: def dummy_simulator ( posterior_samples : torch . Tensor , * args , ** kwargs ) -> torch . Tensor : sample_size = posterior_samples . shape [ 0 ] scale = 1.0 shift = torch . distributions . Gumbel ( loc = torch . zeros ( D ), scale = scale / 2 ) . sample () return torch . distributions . Gumbel ( loc = x_o [ 0 ] + shift , scale = scale ) . sample ( ( sample_size ,) ) x_pp = dummy_simulator ( posterior_samples ) Plotting \\(x_o\\) against the \\(x_{\\text{pp}}\\) , we perform a PPC that plays the role of a sanity check. In this case, the check indicates that \\(x_o\\) falls right within the support of \\(x_{\\text{pp}}\\) , which should make the experimenter rather confident about the estimated posterior : _ = pairplot ( samples = x_pp , points = x_o [ 0 ], limits = torch . tensor ([[ - 2.0 , 5.0 ]] * 5 ), points_colors = \"red\" , figsize = ( 8 , 8 ), offdiag = \"scatter\" , scatter_offdiag = dict ( marker = \".\" , s = 5 ), points_offdiag = dict ( marker = \"+\" , markersize = 20 ), labels = [ rf \"$x_ { d } $\" for d in range ( D )], ) In contrast, \\(x_o\\) falling well outside the support of \\(x_{\\text{pp}}\\) is indicative of a failure to estimate the correct posterior. Here we simulate such a failure mode: error_shift = - 2.0 * torch . ones ( 1 , 5 ) _ = pairplot ( samples = x_pp , points = x_o [ 0 ] + error_shift , limits = torch . tensor ([[ - 2.0 , 5.0 ]] * 5 ), points_colors = \"red\" , figsize = ( 8 , 8 ), offdiag = \"scatter\" , scatter_offdiag = dict ( marker = \".\" , s = 5 ), points_offdiag = dict ( marker = \"+\" , markersize = 20 ), labels = [ rf \"$x_ { d } $\" for d in range ( D )], ) A typical way to investigate this issue would be to run a prior* predictive check , applying the same plotting strategy, but drawing \\(\\theta\\) from the prior instead of the posterior. **The support for \\(x_{\\text{pp}}\\) should be larger and should contain \\(x_o\\) * . If this check is successful, the \u201cblame\u201d can then be shifted to the inference (method used, convergence of density estimators, number of sequential rounds, etc\u2026).","title":"Posterior predictive checks"},{"location":"tutorial/12_diagnostics_posterior_predictive_check/#posterior-predictive-checks-ppc-in-sbi","text":"A common safety check performed as part of inference are Posterior Predictive Checks (PPC) . A PPC compares data \\(x_{\\text{pp}}\\) generated using the parameters \\(\\theta_{\\text{posterior}}\\) sampled from the posterior with the observed data \\(x_o\\) . The general concept is that -if the inference is correct- the generated data \\(x_{\\text{pp}}\\) should \u201clook similar\u201d the oberved data \\(x_0\\) . Said differently, \\(x_o\\) should be within the support of \\(x_{\\text{pp}}\\) . A PPC usually shouldn\u2019t be used as a validation metric . Nonetheless a PPC is a good start for an inference diagnosis and can provide with an intuition about any bias introduced in inference: does \\(x_{\\text{pp}}\\) systematically differ from \\(x_o\\) ?","title":"Posterior Predictive Checks (PPC) in SBI"},{"location":"tutorial/12_diagnostics_posterior_predictive_check/#main-syntax","text":"from sbi.analysis import pairplot # A PPC is performed after we trained or neural posterior posterior . set_default_x ( x_o ) # We draw theta samples from the posterior. This part is not in the scope of SBI posterior_samples = posterior . sample (( 5_000 ,)) # We use posterior theta samples to generate x data x_pp = simulator ( posterior_samples ) # We verify if the observed data falls within the support of the generated data _ = pairplot ( samples = x_pp , points = x_o )","title":"Main syntax"},{"location":"tutorial/12_diagnostics_posterior_predictive_check/#performing-a-ppc-over-a-toy-example","text":"Below we provide an example Posterior Predictive Check (PPC) over some toy example: from sbi.analysis import pairplot import torch _ = torch . manual_seed ( 0 ) We work on an inference problem over three parameters using any of the techniques implemented in sbi . In this tutorial, we load the dummy posterior: from toy_posterior_for_07_cc import ExamplePosterior posterior = ExamplePosterior () Let us say that we are observing the data point \\(x_o\\) : D = 5 # simulator output was 5-dimensional x_o = torch . ones ( 1 , D ) posterior . set_default_x ( x_o ) The posterior can be used to draw \\(\\theta_{\\text{posterior}}\\) samples: posterior_samples = posterior . sample (( 5_000 ,)) fig , ax = pairplot ( samples = posterior_samples , limits = torch . tensor ([[ - 2.5 , 2.5 ]] * 3 ), offdiag = [ \"kde\" ], diag = [ \"kde\" ], figsize = ( 5 , 5 ), labels = [ rf \"$\\theta_ { d } $\" for d in range ( 3 )], ) Now we can use our simulator to generate some data \\(x_{\\text{PP}}\\) , using as input parameters the poterior samples \\(\\theta_{\\text{posterior}}\\) . Note that the simulation part is not in the sbi scope, so any simulator -including a non-Python one- can be used at this stage. In our case we\u2019ll use a dummy simulator: def dummy_simulator ( posterior_samples : torch . Tensor , * args , ** kwargs ) -> torch . Tensor : sample_size = posterior_samples . shape [ 0 ] scale = 1.0 shift = torch . distributions . Gumbel ( loc = torch . zeros ( D ), scale = scale / 2 ) . sample () return torch . distributions . Gumbel ( loc = x_o [ 0 ] + shift , scale = scale ) . sample ( ( sample_size ,) ) x_pp = dummy_simulator ( posterior_samples ) Plotting \\(x_o\\) against the \\(x_{\\text{pp}}\\) , we perform a PPC that plays the role of a sanity check. In this case, the check indicates that \\(x_o\\) falls right within the support of \\(x_{\\text{pp}}\\) , which should make the experimenter rather confident about the estimated posterior : _ = pairplot ( samples = x_pp , points = x_o [ 0 ], limits = torch . tensor ([[ - 2.0 , 5.0 ]] * 5 ), points_colors = \"red\" , figsize = ( 8 , 8 ), offdiag = \"scatter\" , scatter_offdiag = dict ( marker = \".\" , s = 5 ), points_offdiag = dict ( marker = \"+\" , markersize = 20 ), labels = [ rf \"$x_ { d } $\" for d in range ( D )], ) In contrast, \\(x_o\\) falling well outside the support of \\(x_{\\text{pp}}\\) is indicative of a failure to estimate the correct posterior. Here we simulate such a failure mode: error_shift = - 2.0 * torch . ones ( 1 , 5 ) _ = pairplot ( samples = x_pp , points = x_o [ 0 ] + error_shift , limits = torch . tensor ([[ - 2.0 , 5.0 ]] * 5 ), points_colors = \"red\" , figsize = ( 8 , 8 ), offdiag = \"scatter\" , scatter_offdiag = dict ( marker = \".\" , s = 5 ), points_offdiag = dict ( marker = \"+\" , markersize = 20 ), labels = [ rf \"$x_ { d } $\" for d in range ( D )], ) A typical way to investigate this issue would be to run a prior* predictive check , applying the same plotting strategy, but drawing \\(\\theta\\) from the prior instead of the posterior. **The support for \\(x_{\\text{pp}}\\) should be larger and should contain \\(x_o\\) * . If this check is successful, the \u201cblame\u201d can then be shifted to the inference (method used, convergence of density estimators, number of sequential rounds, etc\u2026).","title":"Performing a PPC over a toy example"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/","text":"Simulation-based Calibration in SBI \u00b6 After a density estimator has been trained with simulated data to obtain a posterior, the estimator should be made subject to several diagnostic tests, before being used for inference given the actual observed data. Posterior Predictive Checks (see tutorial 12) provide one way to \u201ccritique\u201d a trained estimator via its predictive performance. Another important approach to such diagnostics is simulation-based calibration as reported by Talts et al, 2018 . Simulation-based calibration (SBC) provides a (qualitative) view and a quantitive measure to check, whether the uncertainties of the posterior are balanced, i.e., neither over-confident nor under-confident. As such, SBC can be viewed as a necessary condition (but not sufficient) for a valid inference algorithm: If SBC checks fail, this tells you that your inference is invalid. If SBC checks pass, this is no guarantee that the posterior estimation is working. In a nutshell \u00b6 To run SBC, we sample theta_o_i values from the prior of the problem at hand we simulate \u201cobservations\u201d from these parameters: x_o_i = simulator(theta_o_i) we perform inference given each observation x_o_i . This produces a separate posterior \\(p_i(\\theta | x_{o,i})\\) for each of x_o_i . The key step for SBC is to generate a set of posterior samples \\(\\{\\theta\\}_i\\) from each posterior (let\u2019s call this theta_i_s , referring to s samples from posterior \\(p_i(\\theta | x_{o,i})\\) ), and to rank the corresponding theta_o_i under this set of samples. A rank is computed by counting how many samples theta_i_s fall below their corresponding theta_o_i (see section 4.1 in Talts et al.). These ranks are then used to perform the SBC check. Key ideas behind SBC \u00b6 The core idea behind SBC is two fold: SBC ranks of ground truth parameters under the inferred posterior samples follow a uniform distribution. (If the SBC ranks are not uniformly distributed, the posterior is not well calibrated.) samples from the data averaged posterior (ensemble of randomly chosen posterior samples given multiple distinct observations x_o ) are distributed according to the prior What can SBC diagnose? \u00b6 SBC can inform us whether we are not wrong. However, it cannot tell us whether we are right, i.e., SBC checks a necessary condition. For example, imagine you run SBC using the prior as a posterior. The ranks would be perfectly uniform. But the inference would be wrong. The Posterior Predictive Checks (see tutorial 12) can be seen as the complementary sufficient check for the posterior (only as a methaphor, no theoretical guarantees here). Using the prior as a posterior and then doing predictive checks would clearly show that inference failed. To summarize SBC can: tell us whether the SBI method applied to the problem at hand produces posteriors that have well-calibrated uncertainties, and if not, what kind of systematic bias it has: negative or positive bias (shift in the mean of the predictions) or over- or underdispersion (too large or too small variance) A healthy posterior \u00b6 Let\u2019s take the gaussian linear simulator from the previous tutorials and run inference with NPE on it. Note: SBC requires running inference several times. Using SBC with amortized methods like NPE is hence a justified endavour: repeated inference is cheap and SBC can be performed with little runtime penalty. This does not hold for sequential methods or anything relying on MCMC or VI (here, parallelization is your friend, num_workers>1 ). import torch _ = torch . manual_seed ( 10 ) from torch import eye , ones , zeros from torch.distributions import MultivariateNormal from sbi.analysis import check_sbc , run_sbc , get_nltp , sbc_rank_plot from sbi.inference import SNPE , SNPE_C , prepare_for_sbi , simulate_for_sbi from sbi.simulators import linear_gaussian , diagonal_linear_gaussian num_dim = 2 num_simulations = 5_000 prior_mean = ones ( num_dim ) prior_cov = 2 * eye ( num_dim ) prior = MultivariateNormal ( loc = prior_mean , covariance_matrix = prior_cov , validate_args = False ) An ideal case \u00b6 To explore SBC, we make our life easy and assume that we deal with a problem where the likelihood is modelled by an identity mapping and a bit of smear. But to start, we only use an almost vanishing smear of 0.01 . default_likelihood_loc = 0.0 # let's start with 0 shift default_likelihood_scale = 0.01 # let's smear theta only by a little bit def simulator ( theta , loc = default_likelihood_loc , scale = default_likelihood_scale ): \"\"\"linear gaussian inspired by sbibm https://github.com/sbi-benchmark/sbibm/blob/15f068a08a938383116ffd92b92de50c580810a3/sbibm/tasks/gaussian_linear/task.py#L74 \"\"\" num_dim = theta . shape [ - 1 ] cov_ = scale * eye ( num_dim ) # always positively semi-definite # using validate_args=False disables sanity checks on `covariance_matrix` # for the sake of speed value = MultivariateNormal ( loc = ( theta + loc ), covariance_matrix = cov_ , validate_args = False ) . sample () return value theta , x = simulate_for_sbi ( simulator , prior , num_simulations ) Running 5000 simulations.: 0%| | 0/5000 [00:00<?, ?it/s] # let's obtain an observation theta_o = prior . sample (( 1 ,)) x_o = simulator ( theta_o ) print ( \"theta:\" , theta_o . numpy ()) print ( \"x :\" , x_o . numpy ()) theta: [[1.8523536 3.004352 ]] x : [[1.7897944 3.1327424]] # we use a mdn model to have a fast turnaround with training. inferer = SNPE ( prior , density_estimator = \"mdn\" ) # append simulations and run training. inferer . append_simulations ( theta , x ) . train (); Neural network successfully converged after 97 epochs. posterior = inferer . build_posterior () posterior_samples = posterior . sample (( 15_000 ,), x = x_o ) # Generate predictive samples by simulating from posterior samples. posterior_predictive_samples = simulator ( posterior_samples ) Drawing 15000 posterior samples: 0%| | 0/15000 [00:00<?, ?it/s] # let's do some posterior predictive checks to see if the # posterior predictive samples cluster aournd the observation `x_o`. from sbi.analysis import pairplot fig , ax = pairplot ( samples = posterior_predictive_samples , points = x_o , limits = list ( zip ( 0.75 * x_o . flatten (), 1.25 * x_o . flatten ())), offdiag = \"kde\" , diag = \"kde\" , figsize = ( 5 , 5 ), labels = [ rf \"$x_ { d } $\" for d in range ( 3 )], ) The observation x_o falls into the support of the predicted posterior samples, i.e. it is within simulator(posterior_samples) . Given the simulator, this is indicative that our posterior estimates the data well. Running SBC \u00b6 We have a working and trained posterior at this point! Hurray! Let\u2019s look at the SBC metrics now. num_sbc_runs = 1_000 # choose a number of sbc runs, should be ~100s or ideally 1000 # generate ground truth parameters and corresponding simulated observations for SBC. thetas = prior . sample (( num_sbc_runs ,)) xs = simulator ( thetas ) SBC is implemented in sbi for your use on any sbi posterior. To run it, we only need to call run_sbc with appropriate parameters. Note : For amortized neural posteriors (like in this tutorial), execution of sbc is expected to be fast. For posteriors that conduct inference with MCMC and hence are slow, run_sbc exposes the use of multiple internal parallel workers to the user. To use this feature, add num_workers = 2 to the parameters for use of two workers. See the API documentation for details. # run SBC: for each inference we draw 1000 posterior samples. num_posterior_samples = 1_000 ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = num_posterior_samples ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] sbi establishes two methods to do simulation-based calibration: metrics to compare the sbc ranks with a uniform distribution control plots for visual inspections like fig. 1 or 2 in Talts et al, 2018 The ranks count is performed per dimension of theta , i.e. on the 1-D marginal posterior estimates. According to theory, the distribution of these ranks (per dimension of theta ) should turn out to be uniformly distributed. The data average posterior dap (see equation 1 of Talts et al, 2018 ) is yet another metric of interest. It is built from singular random samples of the estimated posterior samples for each xs above. The dap is expected to match the prior distribution used (see equation 1 in Talts et al, 2018 too). check_stats = check_sbc ( ranks , thetas , dap_samples , num_posterior_samples = num_posterior_samples ) The check_stats variable created contains a dictionary with 3 metrics that help to judge our posterior. The \u201cfirst\u201d two compare the ranks to a uniform distribution. Ranks versus Uniform distribution \u00b6 print ( f \"kolmogorov-smirnov p-values \\n check_stats['ks_pvals'] = { check_stats [ 'ks_pvals' ] . numpy () } \" ) kolmogorov-smirnov p-values check_stats['ks_pvals'] = [0.50026375 0.9563929 ] The Kolmogorov-Smirnov (KS test, see also here ) as used by check_sbc provides p-values pvals on the null hypothesis that the samples from ranks are drawn from a uniform distribution (in other words H_0: PDF(ranks) == PDF(uniform) ). We are provided two values as our problem is two-dimensional - one p-value for each dimension. The null hypothesis (of both distributions being equal) is rejected if the p-values fall below a significance threshold (usually < 0.05 ). Therefor, vanishing p-values ( ks_pvals=0 ) are interpreted to indicate a vanishing false positive rate to (mistakenly) consider both distrubtions being \u201cequal\u201d. As we are provided high values of this statistic in dimension 0, samples appear to be from the same distribution, i.e. samples from ranks follow a uniform distribution here. The picture is not so clear cut with dimension 1. This merits a second test. print ( f \"c2st accuracies \\n check_stats['c2st_ranks'] = { check_stats [ 'c2st_ranks' ] . numpy () } \" ) c2st accuracies check_stats['c2st_ranks'] = [0.571 0.564] The second tier of metrics comparing ranks with a uniform distributions is a c2st test (see here for details). This is a nonparametric two sample test based on training a classifier to differented one of the ensembles ( ranks versus samples from a uniform distribution) by being trained on the other. The values reported are the accuracies from cross-validation. If you see values around 0.5 , the classifier was unable to differentiate both ensembles, i.e. ranks are very uniform. If the values are high towards 1 , this matches the case where ranks is very unlike a uniform distribution. Data averaged posterior (DAP) versus prior \u00b6 print ( f \"- c2st accuracies check_stats['c2st_dap'] = { check_stats [ 'c2st_dap' ] . numpy () } \" ) - c2st accuracies check_stats['c2st_dap'] = [0.489 0.507] The last metric reported is again based on c2st computed per dimension of theta . If you see values around 0.5 , the c2st classifier was unable to differentiate both ensembles for each dimension of theta , i.e. dap are much like (if not identical to) the prior. If the values are very high towards 1 , this represents the case where dap is very unlike the prior distribution. Visual Inspection \u00b6 from sbi.analysis import sbc_rank_plot f , ax = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , # by passing None we use a heuristic for the number of bins. ) The two plots visualize the distribution of ranks (here depicted in red) in each dimension. Highlighted in grey you see the 99% confidence interval of a uniform distribution given the number of samples provided. In plain english: for a uniform distribution, we would expect 1 out of 100 (red) bars to lie outside the grey area. We also observe, that the entries fluctuate to some degree. This can be considered a hint that sbc should be conducted with a lot more samples than 1000. A good rule of thumb is that given the number of bins B and the number of SBC samples N (chosed to be 1_000 here) should amount to N / B ~ 20 . f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"cdf\" ) The above provides a visual representation of the cumulative density function (CDF) of ranks (blue and orange for each dimension of theta ) with respect to the 95% confidence interval of a uniform distribution (grey). When things go haywire \u00b6 Next, we would like to explore some pathologies visible in sbc plots which can hint at our estimated posterior being somewhat wrong or completely off. A shifted posterior mean \u00b6 In this scenario we emulate the situation that our posterior estimates incorrectly with a constant shift. We reuse our trained NPE posterior from above and wrap it so that all samples returned expose a constant shift by +0.1 . from utils_13_diagnosis_sbc import BiasedPosterior # this posterior shifts the expected value of the prior by .1 posterior_ = BiasedPosterior ( posterior , shift = 0.1 ) ranks , dap_samples = run_sbc ( thetas , xs , posterior_ ) check_stats = check_sbc ( ranks , thetas , dap_samples , 1_000 ) print ( check_stats ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] {'ks_pvals': tensor([0., 0.]), 'c2st_ranks': tensor([0.6815, 0.6730]), 'c2st_dap': tensor([0.5025, 0.4935])} We can see that the Kolmogorv-Smirnov p-values vanish ( 'ks_pvals': tensor([0., 0.]) ). Thus, we can reject the hypothesis that the ranks PDF is the uniform PDF. The c2st accuracies show values higher than 0.5 . This is indicative that the ranks distribution is not a uniform PDF as well. f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"hist\" , num_bins = 30 ) Inspecting the histograms for both dimenions, the rank distribution is clearly tilted to low rank values for both dimensions. Because we have shifted the expected value of the posterior to higher values (by 0.1 ), we see more entries at low rank values. Let\u2019s try to shift all posterior samples in the opposite direction. We shift the expectation value by -0.1 : posterior_ = BiasedPosterior ( posterior , shift =- 0.1 ) ranks , dap_samples = run_sbc ( thetas , xs , posterior_ ) check_stats = check_sbc ( ranks , thetas , dap_samples , 1_000 ) print ( check_stats ) f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"hist\" , num_bins = 30 ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] {'ks_pvals': tensor([0., 0.]), 'c2st_ranks': tensor([0.6795, 0.6955]), 'c2st_dap': tensor([0.4910, 0.4955])} A similar behavior is observed, but this time we see an upshot of ranks to higher values. Because we have shifted the expected value of the posterior to smaller values, we see an upshot in high rank counts. It is interesting to see that the historgams obtained provide very convincing evidence that this is not a uniform distribution. To conlude at this point, the rank distribution is capable of identifying pathologies of the estimated posterior : a left-skewed rank distribution shows a systematic underestimation of the posterior mean (we shifted the posterior by 0.1 ) a rank-skewed rank distribution shows a systematic overestimation of the posterior mean (we shifted the posterior by -0.1 ) A dispersed posterior \u00b6 In this scenario we emulate the situation if our posterior estimates incorrectly with a dispersion, i.e. the posterior is too wide or too thin. We reuse our trained NPE posterior from above and wrap it so that all samples return a dispersion by 100% more wide ( 2 ), i.e. the variance is overestimated by a factor of 2. from utils_13_diagnosis_sbc import DispersedPosterior # this posterior which disperses the expected posterior value of the prior by 2. posterior_ = DispersedPosterior ( posterior , dispersion = 2.0 ) ranks , dap_samples = run_sbc ( thetas , xs , posterior_ ) check_stats = check_sbc ( ranks , thetas , dap_samples , 1_000 ) print ( check_stats ) f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"hist\" , num_bins = 30 ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] {'ks_pvals': tensor([8.2151e-09, 6.9635e-07]), 'c2st_ranks': tensor([0.6150, 0.6160]), 'c2st_dap': tensor([0.5050, 0.4905])} The rank histograms now look more like a very wide gaussian distribution centered in the middle. The KS p-values again vanish unsurprisingly (we must reject the hypothesis that both distributions are from the same uniform PDF) and the c2st_ranks indicate that the rank histogram is not uniform too. As our posterior samples are distributed too broad now, we obtain more \u201cmedium\u201d range ranks and hence produce the peak of ranks in the center of the histogram. We can repeat this exercise by making our posterior too thin, i.e. the variance of the posterior is too small. Let\u2019s cut it by half. posterior_ = DispersedPosterior ( posterior , dispersion = 0.5 ) ranks , dap_samples = run_sbc ( thetas , xs , posterior_ ) check_stats = check_sbc ( ranks , thetas , dap_samples , 1_000 ) print ( check_stats ) f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"hist\" , num_bins = 30 ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] {'ks_pvals': tensor([1.1753e-07, 1.7929e-08]), 'c2st_ranks': tensor([0.5755, 0.6125]), 'c2st_dap': tensor([0.4980, 0.5075])} The histogram of ranks now shoots above the allowed (greyed) area for a uniform distributed around the extrema. We made the posterior samples too thin, so we received more extreme counts of ranks. The KS p-values vanish again and the c2st metric of the ranks is also larger than .5 which underlines that our rank distribution is not uniformly distributed. We again see, the rank distribution is capable of identifying pathologies of the estimated posterior : a centrally peaked rank distribution shows a systematic over-estimation of the posterior variance (we dispersed the variance of the posterior by a factor of 2 ) a U shaped rank distribution shows a systematic under-estimation of the posterior variance (we dispersed the variance of the posterior by a factor of .5 ) Simulation-based calibration offers a direct handle on which pathology an estimated posterior examines. Outside of this tutorial, you may very well encounter situations with mixtures of effects (a shifted mean and over-estimated variance). Moreover, uncovering a malignant posterior is only the first step to fix your analysis.","title":"Simulation-based calibration"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#simulation-based-calibration-in-sbi","text":"After a density estimator has been trained with simulated data to obtain a posterior, the estimator should be made subject to several diagnostic tests, before being used for inference given the actual observed data. Posterior Predictive Checks (see tutorial 12) provide one way to \u201ccritique\u201d a trained estimator via its predictive performance. Another important approach to such diagnostics is simulation-based calibration as reported by Talts et al, 2018 . Simulation-based calibration (SBC) provides a (qualitative) view and a quantitive measure to check, whether the uncertainties of the posterior are balanced, i.e., neither over-confident nor under-confident. As such, SBC can be viewed as a necessary condition (but not sufficient) for a valid inference algorithm: If SBC checks fail, this tells you that your inference is invalid. If SBC checks pass, this is no guarantee that the posterior estimation is working.","title":"Simulation-based Calibration in SBI"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#in-a-nutshell","text":"To run SBC, we sample theta_o_i values from the prior of the problem at hand we simulate \u201cobservations\u201d from these parameters: x_o_i = simulator(theta_o_i) we perform inference given each observation x_o_i . This produces a separate posterior \\(p_i(\\theta | x_{o,i})\\) for each of x_o_i . The key step for SBC is to generate a set of posterior samples \\(\\{\\theta\\}_i\\) from each posterior (let\u2019s call this theta_i_s , referring to s samples from posterior \\(p_i(\\theta | x_{o,i})\\) ), and to rank the corresponding theta_o_i under this set of samples. A rank is computed by counting how many samples theta_i_s fall below their corresponding theta_o_i (see section 4.1 in Talts et al.). These ranks are then used to perform the SBC check.","title":"In a nutshell"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#key-ideas-behind-sbc","text":"The core idea behind SBC is two fold: SBC ranks of ground truth parameters under the inferred posterior samples follow a uniform distribution. (If the SBC ranks are not uniformly distributed, the posterior is not well calibrated.) samples from the data averaged posterior (ensemble of randomly chosen posterior samples given multiple distinct observations x_o ) are distributed according to the prior","title":"Key ideas behind SBC"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#what-can-sbc-diagnose","text":"SBC can inform us whether we are not wrong. However, it cannot tell us whether we are right, i.e., SBC checks a necessary condition. For example, imagine you run SBC using the prior as a posterior. The ranks would be perfectly uniform. But the inference would be wrong. The Posterior Predictive Checks (see tutorial 12) can be seen as the complementary sufficient check for the posterior (only as a methaphor, no theoretical guarantees here). Using the prior as a posterior and then doing predictive checks would clearly show that inference failed. To summarize SBC can: tell us whether the SBI method applied to the problem at hand produces posteriors that have well-calibrated uncertainties, and if not, what kind of systematic bias it has: negative or positive bias (shift in the mean of the predictions) or over- or underdispersion (too large or too small variance)","title":"What can SBC diagnose?"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#a-healthy-posterior","text":"Let\u2019s take the gaussian linear simulator from the previous tutorials and run inference with NPE on it. Note: SBC requires running inference several times. Using SBC with amortized methods like NPE is hence a justified endavour: repeated inference is cheap and SBC can be performed with little runtime penalty. This does not hold for sequential methods or anything relying on MCMC or VI (here, parallelization is your friend, num_workers>1 ). import torch _ = torch . manual_seed ( 10 ) from torch import eye , ones , zeros from torch.distributions import MultivariateNormal from sbi.analysis import check_sbc , run_sbc , get_nltp , sbc_rank_plot from sbi.inference import SNPE , SNPE_C , prepare_for_sbi , simulate_for_sbi from sbi.simulators import linear_gaussian , diagonal_linear_gaussian num_dim = 2 num_simulations = 5_000 prior_mean = ones ( num_dim ) prior_cov = 2 * eye ( num_dim ) prior = MultivariateNormal ( loc = prior_mean , covariance_matrix = prior_cov , validate_args = False )","title":"A healthy posterior"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#an-ideal-case","text":"To explore SBC, we make our life easy and assume that we deal with a problem where the likelihood is modelled by an identity mapping and a bit of smear. But to start, we only use an almost vanishing smear of 0.01 . default_likelihood_loc = 0.0 # let's start with 0 shift default_likelihood_scale = 0.01 # let's smear theta only by a little bit def simulator ( theta , loc = default_likelihood_loc , scale = default_likelihood_scale ): \"\"\"linear gaussian inspired by sbibm https://github.com/sbi-benchmark/sbibm/blob/15f068a08a938383116ffd92b92de50c580810a3/sbibm/tasks/gaussian_linear/task.py#L74 \"\"\" num_dim = theta . shape [ - 1 ] cov_ = scale * eye ( num_dim ) # always positively semi-definite # using validate_args=False disables sanity checks on `covariance_matrix` # for the sake of speed value = MultivariateNormal ( loc = ( theta + loc ), covariance_matrix = cov_ , validate_args = False ) . sample () return value theta , x = simulate_for_sbi ( simulator , prior , num_simulations ) Running 5000 simulations.: 0%| | 0/5000 [00:00<?, ?it/s] # let's obtain an observation theta_o = prior . sample (( 1 ,)) x_o = simulator ( theta_o ) print ( \"theta:\" , theta_o . numpy ()) print ( \"x :\" , x_o . numpy ()) theta: [[1.8523536 3.004352 ]] x : [[1.7897944 3.1327424]] # we use a mdn model to have a fast turnaround with training. inferer = SNPE ( prior , density_estimator = \"mdn\" ) # append simulations and run training. inferer . append_simulations ( theta , x ) . train (); Neural network successfully converged after 97 epochs. posterior = inferer . build_posterior () posterior_samples = posterior . sample (( 15_000 ,), x = x_o ) # Generate predictive samples by simulating from posterior samples. posterior_predictive_samples = simulator ( posterior_samples ) Drawing 15000 posterior samples: 0%| | 0/15000 [00:00<?, ?it/s] # let's do some posterior predictive checks to see if the # posterior predictive samples cluster aournd the observation `x_o`. from sbi.analysis import pairplot fig , ax = pairplot ( samples = posterior_predictive_samples , points = x_o , limits = list ( zip ( 0.75 * x_o . flatten (), 1.25 * x_o . flatten ())), offdiag = \"kde\" , diag = \"kde\" , figsize = ( 5 , 5 ), labels = [ rf \"$x_ { d } $\" for d in range ( 3 )], ) The observation x_o falls into the support of the predicted posterior samples, i.e. it is within simulator(posterior_samples) . Given the simulator, this is indicative that our posterior estimates the data well.","title":"An ideal case"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#running-sbc","text":"We have a working and trained posterior at this point! Hurray! Let\u2019s look at the SBC metrics now. num_sbc_runs = 1_000 # choose a number of sbc runs, should be ~100s or ideally 1000 # generate ground truth parameters and corresponding simulated observations for SBC. thetas = prior . sample (( num_sbc_runs ,)) xs = simulator ( thetas ) SBC is implemented in sbi for your use on any sbi posterior. To run it, we only need to call run_sbc with appropriate parameters. Note : For amortized neural posteriors (like in this tutorial), execution of sbc is expected to be fast. For posteriors that conduct inference with MCMC and hence are slow, run_sbc exposes the use of multiple internal parallel workers to the user. To use this feature, add num_workers = 2 to the parameters for use of two workers. See the API documentation for details. # run SBC: for each inference we draw 1000 posterior samples. num_posterior_samples = 1_000 ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = num_posterior_samples ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] sbi establishes two methods to do simulation-based calibration: metrics to compare the sbc ranks with a uniform distribution control plots for visual inspections like fig. 1 or 2 in Talts et al, 2018 The ranks count is performed per dimension of theta , i.e. on the 1-D marginal posterior estimates. According to theory, the distribution of these ranks (per dimension of theta ) should turn out to be uniformly distributed. The data average posterior dap (see equation 1 of Talts et al, 2018 ) is yet another metric of interest. It is built from singular random samples of the estimated posterior samples for each xs above. The dap is expected to match the prior distribution used (see equation 1 in Talts et al, 2018 too). check_stats = check_sbc ( ranks , thetas , dap_samples , num_posterior_samples = num_posterior_samples ) The check_stats variable created contains a dictionary with 3 metrics that help to judge our posterior. The \u201cfirst\u201d two compare the ranks to a uniform distribution.","title":"Running SBC"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#ranks-versus-uniform-distribution","text":"print ( f \"kolmogorov-smirnov p-values \\n check_stats['ks_pvals'] = { check_stats [ 'ks_pvals' ] . numpy () } \" ) kolmogorov-smirnov p-values check_stats['ks_pvals'] = [0.50026375 0.9563929 ] The Kolmogorov-Smirnov (KS test, see also here ) as used by check_sbc provides p-values pvals on the null hypothesis that the samples from ranks are drawn from a uniform distribution (in other words H_0: PDF(ranks) == PDF(uniform) ). We are provided two values as our problem is two-dimensional - one p-value for each dimension. The null hypothesis (of both distributions being equal) is rejected if the p-values fall below a significance threshold (usually < 0.05 ). Therefor, vanishing p-values ( ks_pvals=0 ) are interpreted to indicate a vanishing false positive rate to (mistakenly) consider both distrubtions being \u201cequal\u201d. As we are provided high values of this statistic in dimension 0, samples appear to be from the same distribution, i.e. samples from ranks follow a uniform distribution here. The picture is not so clear cut with dimension 1. This merits a second test. print ( f \"c2st accuracies \\n check_stats['c2st_ranks'] = { check_stats [ 'c2st_ranks' ] . numpy () } \" ) c2st accuracies check_stats['c2st_ranks'] = [0.571 0.564] The second tier of metrics comparing ranks with a uniform distributions is a c2st test (see here for details). This is a nonparametric two sample test based on training a classifier to differented one of the ensembles ( ranks versus samples from a uniform distribution) by being trained on the other. The values reported are the accuracies from cross-validation. If you see values around 0.5 , the classifier was unable to differentiate both ensembles, i.e. ranks are very uniform. If the values are high towards 1 , this matches the case where ranks is very unlike a uniform distribution.","title":"Ranks versus Uniform distribution"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#data-averaged-posterior-dap-versus-prior","text":"print ( f \"- c2st accuracies check_stats['c2st_dap'] = { check_stats [ 'c2st_dap' ] . numpy () } \" ) - c2st accuracies check_stats['c2st_dap'] = [0.489 0.507] The last metric reported is again based on c2st computed per dimension of theta . If you see values around 0.5 , the c2st classifier was unable to differentiate both ensembles for each dimension of theta , i.e. dap are much like (if not identical to) the prior. If the values are very high towards 1 , this represents the case where dap is very unlike the prior distribution.","title":"Data averaged posterior (DAP) versus prior"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#visual-inspection","text":"from sbi.analysis import sbc_rank_plot f , ax = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , # by passing None we use a heuristic for the number of bins. ) The two plots visualize the distribution of ranks (here depicted in red) in each dimension. Highlighted in grey you see the 99% confidence interval of a uniform distribution given the number of samples provided. In plain english: for a uniform distribution, we would expect 1 out of 100 (red) bars to lie outside the grey area. We also observe, that the entries fluctuate to some degree. This can be considered a hint that sbc should be conducted with a lot more samples than 1000. A good rule of thumb is that given the number of bins B and the number of SBC samples N (chosed to be 1_000 here) should amount to N / B ~ 20 . f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"cdf\" ) The above provides a visual representation of the cumulative density function (CDF) of ranks (blue and orange for each dimension of theta ) with respect to the 95% confidence interval of a uniform distribution (grey).","title":"Visual Inspection"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#when-things-go-haywire","text":"Next, we would like to explore some pathologies visible in sbc plots which can hint at our estimated posterior being somewhat wrong or completely off.","title":"When things go haywire"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#a-shifted-posterior-mean","text":"In this scenario we emulate the situation that our posterior estimates incorrectly with a constant shift. We reuse our trained NPE posterior from above and wrap it so that all samples returned expose a constant shift by +0.1 . from utils_13_diagnosis_sbc import BiasedPosterior # this posterior shifts the expected value of the prior by .1 posterior_ = BiasedPosterior ( posterior , shift = 0.1 ) ranks , dap_samples = run_sbc ( thetas , xs , posterior_ ) check_stats = check_sbc ( ranks , thetas , dap_samples , 1_000 ) print ( check_stats ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] {'ks_pvals': tensor([0., 0.]), 'c2st_ranks': tensor([0.6815, 0.6730]), 'c2st_dap': tensor([0.5025, 0.4935])} We can see that the Kolmogorv-Smirnov p-values vanish ( 'ks_pvals': tensor([0., 0.]) ). Thus, we can reject the hypothesis that the ranks PDF is the uniform PDF. The c2st accuracies show values higher than 0.5 . This is indicative that the ranks distribution is not a uniform PDF as well. f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"hist\" , num_bins = 30 ) Inspecting the histograms for both dimenions, the rank distribution is clearly tilted to low rank values for both dimensions. Because we have shifted the expected value of the posterior to higher values (by 0.1 ), we see more entries at low rank values. Let\u2019s try to shift all posterior samples in the opposite direction. We shift the expectation value by -0.1 : posterior_ = BiasedPosterior ( posterior , shift =- 0.1 ) ranks , dap_samples = run_sbc ( thetas , xs , posterior_ ) check_stats = check_sbc ( ranks , thetas , dap_samples , 1_000 ) print ( check_stats ) f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"hist\" , num_bins = 30 ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] {'ks_pvals': tensor([0., 0.]), 'c2st_ranks': tensor([0.6795, 0.6955]), 'c2st_dap': tensor([0.4910, 0.4955])} A similar behavior is observed, but this time we see an upshot of ranks to higher values. Because we have shifted the expected value of the posterior to smaller values, we see an upshot in high rank counts. It is interesting to see that the historgams obtained provide very convincing evidence that this is not a uniform distribution. To conlude at this point, the rank distribution is capable of identifying pathologies of the estimated posterior : a left-skewed rank distribution shows a systematic underestimation of the posterior mean (we shifted the posterior by 0.1 ) a rank-skewed rank distribution shows a systematic overestimation of the posterior mean (we shifted the posterior by -0.1 )","title":"A shifted posterior mean"},{"location":"tutorial/13_diagnostics_simulation_based_calibration/#a-dispersed-posterior","text":"In this scenario we emulate the situation if our posterior estimates incorrectly with a dispersion, i.e. the posterior is too wide or too thin. We reuse our trained NPE posterior from above and wrap it so that all samples return a dispersion by 100% more wide ( 2 ), i.e. the variance is overestimated by a factor of 2. from utils_13_diagnosis_sbc import DispersedPosterior # this posterior which disperses the expected posterior value of the prior by 2. posterior_ = DispersedPosterior ( posterior , dispersion = 2.0 ) ranks , dap_samples = run_sbc ( thetas , xs , posterior_ ) check_stats = check_sbc ( ranks , thetas , dap_samples , 1_000 ) print ( check_stats ) f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"hist\" , num_bins = 30 ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] {'ks_pvals': tensor([8.2151e-09, 6.9635e-07]), 'c2st_ranks': tensor([0.6150, 0.6160]), 'c2st_dap': tensor([0.5050, 0.4905])} The rank histograms now look more like a very wide gaussian distribution centered in the middle. The KS p-values again vanish unsurprisingly (we must reject the hypothesis that both distributions are from the same uniform PDF) and the c2st_ranks indicate that the rank histogram is not uniform too. As our posterior samples are distributed too broad now, we obtain more \u201cmedium\u201d range ranks and hence produce the peak of ranks in the center of the histogram. We can repeat this exercise by making our posterior too thin, i.e. the variance of the posterior is too small. Let\u2019s cut it by half. posterior_ = DispersedPosterior ( posterior , dispersion = 0.5 ) ranks , dap_samples = run_sbc ( thetas , xs , posterior_ ) check_stats = check_sbc ( ranks , thetas , dap_samples , 1_000 ) print ( check_stats ) f , ax = sbc_rank_plot ( ranks , 1_000 , plot_type = \"hist\" , num_bins = 30 ) Running 1000 sbc samples.: 0%| | 0/1000 [00:00<?, ?it/s] {'ks_pvals': tensor([1.1753e-07, 1.7929e-08]), 'c2st_ranks': tensor([0.5755, 0.6125]), 'c2st_dap': tensor([0.4980, 0.5075])} The histogram of ranks now shoots above the allowed (greyed) area for a uniform distributed around the extrema. We made the posterior samples too thin, so we received more extreme counts of ranks. The KS p-values vanish again and the c2st metric of the ranks is also larger than .5 which underlines that our rank distribution is not uniformly distributed. We again see, the rank distribution is capable of identifying pathologies of the estimated posterior : a centrally peaked rank distribution shows a systematic over-estimation of the posterior variance (we dispersed the variance of the posterior by a factor of 2 ) a U shaped rank distribution shows a systematic under-estimation of the posterior variance (we dispersed the variance of the posterior by a factor of .5 ) Simulation-based calibration offers a direct handle on which pathology an estimated posterior examines. Outside of this tutorial, you may very well encounter situations with mixtures of effects (a shifted mean and over-estimated variance). Moreover, uncovering a malignant posterior is only the first step to fix your analysis.","title":"A dispersed posterior"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/","text":"SBI with iid data and permutation-invariant embeddings \u00b6 There are scenarios in which we observe multiple data points per experiment and we can assume that they are independent and identically distributed (iid, i.e., they are assumed to have the same underlying model parameters). For example, in decision-making experiments, the experiment is often repeated in trials with the same experimental settings and conditions. The corresponding set of trials is then assumed to be \u201ciid\u201d given a single parameter set. In such a scenario, we may want to obtain the posterior given a set of observation \\(p(\\theta | X=\\{x_i\\}_i^N)\\) . Amortization of neural network training: iid-inference with NLE / NRE \u00b6 For some SBI variants the iid assumption can be exploited: when using a likelihood-based SBI method ( SNLE , SNRE ) one can train the density or ratio estimator on single-trial data, and then perform inference with MCMC or variational inference ( VI ). Crucially, because the data is iid and the estimator is trained on single-trial data, one can repeat the inference with a different x_o (a different set of trials, or different number of trials) without having to retrain the density estimator. One can interpet this as amortization of the SBI training: we can obtain a neural likelihood, or likelihood-ratio estimate for new x_o s without retraining, but we still have to run MCMC or VI to do inference. In addition, one cannot only change the number of trials of a new x_o , but also the entire inference setting. For example, one can apply hierarchical inference with changing hierarchical denpendencies between the model parameters\u2013all without having to retrain the density estimator because it estimates single-trail likelihoods. Full amortization: iid-inference with NPE and permutation-invariant embedding nets \u00b6 When performing neural posterior estimation ( SNPE ) we cannot exploit the iid assumption directly. Thus, the underlying neural network takes x as input and predicts the parameters of the density estimator. As a consequence, if x is a set of iid observations \\(X=\\{x_i\\}_i^N\\) then the neural network has to be invariant to permutations of this set, i.e., it has to be permutation invariant. In addition, the neural network has to be able to consume a varying number of iid datapoints in order to be amortized over the number of trials. Therefore, in order to use SNPE for inference on iid data, we need to provide a corresponding embedding network that handles the iid-data. This will likely require some hyperparameter tuning and more training data for inference to work accurately. But once we have this, inference is fully amortized, i.e., we can get new posterior samples almost instantly without retraining and without running MCMC or VI . SBI with trial-based data \u00b6 For illustration, we use a simple linear Gaussian simulator, as in previous tutorials. The simulator takes a single parameter (vector) which is the mean of a Gaussian. The simulator then adds noise with a fixed variance (set to one). We define a Gaussian prior over the mean and perform inference. The observed data is also sampled from a Gaussian with some fixed \u201cground-truth\u201d parameter \\(\\theta_o\\) . Crucially, the observed data x_o can consist of multiple samples given the same ground-truth parameters and these samples are iid given \\(\\theta\\) : \\[ \\theta \\sim \\mathcal{N}(\\mu_0,\\; \\Sigma_0) \\\\ x | \\theta \\sim \\mathcal{N}(\\theta,\\; \\Sigma=I) \\\\ \\mathbf{x_o} = \\{x_o^i\\}_{i=1}^N \\sim \\mathcal{N}(\\theta_o,\\; \\Sigma=I) \\] For this toy problem, the ground-truth posterior is well defined, it is again a Gaussian, centered on the mean of \\(\\mathbf{x_o}\\) and with variance scaled by the number of trials \\(N\\) , i.e., the more trials we observe, the more information about the underlying \\(\\theta_o\\) we have and the more concentrated the posteriors becomes. We will illustrate this below: import torch import matplotlib.pyplot as plt from torch import zeros , ones , eye from torch.distributions import MultivariateNormal from sbi.inference import SNLE , SNPE , prepare_for_sbi , simulate_for_sbi from sbi.analysis import pairplot from sbi.utils.metrics import c2st from sbi.simulators.linear_gaussian import ( linear_gaussian , true_posterior_linear_gaussian_mvn_prior , ) # Seeding torch . manual_seed ( 1 ); # Gaussian simulator theta_dim = 2 x_dim = theta_dim # likelihood_mean will be likelihood_shift+theta likelihood_shift = - 1.0 * zeros ( x_dim ) likelihood_cov = 0.3 * eye ( x_dim ) prior_mean = zeros ( theta_dim ) prior_cov = eye ( theta_dim ) prior = MultivariateNormal ( loc = prior_mean , covariance_matrix = prior_cov ) # Define Gaussian simulator simulator , prior = prepare_for_sbi ( lambda theta : linear_gaussian ( theta , likelihood_shift , likelihood_cov ), prior ) # Use built-in function to obtain ground-truth posterior given x_o def get_true_posterior_samples ( x_o , num_samples = 1 ): return true_posterior_linear_gaussian_mvn_prior ( x_o , likelihood_shift , likelihood_cov , prior_mean , prior_cov ) . sample (( num_samples ,)) The analytical posterior concentrates around true parameters with increasing number of IID trials \u00b6 num_trials = [ 1 , 5 , 15 , 20 ] theta_o = zeros ( 1 , theta_dim ) # Generate multiple x_os with increasing number of trials. xos = [ theta_o . repeat ( nt , 1 ) for nt in num_trials ] # Obtain analytical posterior samples for each of them. true_samples = [ get_true_posterior_samples ( xo , 5000 ) for xo in xos ] # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( true_samples , points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter. IID inference with NLE \u00b6 (S)NLE and (S)NRE can perform inference given multiple IID obserations by using only single-trial training data (i.e., for training, we run the simulator only once per parameter set). Once the likelihood is learned on single trials (i.e., a neural network that predicts the likelihood of a single observation given a parameter set), one can sample the posterior for any number of trials. This works because, given a single-trial neural likelihood from (S)NLE or (S)NRE, we can calculate the joint likelihoods of all trials by multiplying them together (or adding them in log-space). The joint likelihood can then be plugged into MCMC or VI . sbi takes care of all of these steps, so you do not have to implement anything yourself: # Train SNLE. inferer = SNLE ( prior , show_progress_bars = True , density_estimator = \"mdn\" ) theta , x = simulate_for_sbi ( simulator , prior , 10000 , simulation_batch_size = 1000 ) inferer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ); Running 10000 simulations.: 0%| | 0/10000 [00:00<?, ?it/s] Neural network successfully converged after 43 epochs. # Obtain posterior samples for different number of iid xos. nle_samples = [] num_samples = 5000 mcmc_parameters = dict ( num_chains = 50 , thin = 10 , warmup_steps = 50 , init_strategy = \"proposal\" , ) mcmc_method = \"slice_np_vectorized\" posterior = inferer . build_posterior ( mcmc_method = mcmc_method , mcmc_parameters = mcmc_parameters , ) # Generate samples with MCMC given the same set of x_os as above. for xo in xos : nle_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xo )) Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 5 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 15 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 20 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] Note that sbi warns about iid-x with increasing number of trial here. We ignore the warning because that\u2019s exactly what we want to do. # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( nle_samples , points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); The pairplot above already indicates that (S)NLE is well able to obtain accurate posterior samples also for increasing number of trials (note that we trained the single-round version of SNLE so that we did not have to re-train it for new \\(x_o\\) ). Quantitatively we can measure the accuracy of SNLE by calculating the c2st score between SNLE and the true posterior samples, where the best accuracy is perfect for 0.5 : cs = [ c2st ( torch . from_numpy ( s1 ), torch . from_numpy ( s2 )) for s1 , s2 in zip ( true_samples , nle_samples ) ] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.51 c2st score for num_trials=15: 0.51 c2st score for num_trials=20: 0.51 IID inference with NPE using permutation-invariant embedding nets \u00b6 For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials. We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net. To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021). To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. This means that, unlike for (S)NLE and (S)NRE, (S)NPE requires to run the simulator multiple times for individual parameter sets to generate the training data. In order to implement this in sbi , \u201cunobserved\u201d trials in the training dataset have to be masked by NaNs (and ignore the resulting SBI warning about NaNs in the training data). Construct training data set. \u00b6 # we need to fix the maximum number of trials. max_num_trials = 20 # construct training data set: we want to cover the full range of possible number of # trials num_training_samples = 5000 theta = prior . sample (( num_training_samples ,)) # there are certainly smarter ways to construct the training data set, but we go with a # for loop here for illustration purposes. x = torch . ones ( num_training_samples * max_num_trials , max_num_trials , x_dim ) * float ( \"nan\" ) for i in range ( num_training_samples ): xi = simulator ( theta [ i ] . repeat ( max_num_trials , 1 )) for j in range ( max_num_trials ): x [ i * max_num_trials + j , : j + 1 , :] = xi [: j + 1 , :] theta = theta . repeat_interleave ( max_num_trials , dim = 0 ) Build embedding net \u00b6 from sbi.neural_nets.embedding_nets import ( FCEmbedding , PermutationInvariantEmbedding , ) from sbi.utils import posterior_nn # embedding latent_dim = 10 single_trial_net = FCEmbedding ( input_dim = theta_dim , num_hiddens = 40 , num_layers = 2 , output_dim = latent_dim , ) embedding_net = PermutationInvariantEmbedding ( single_trial_net , trial_net_output_dim = latent_dim , # NOTE: post-embedding is not needed really. num_layers = 1 , num_hiddens = 10 , output_dim = 10 , ) # we choose a simple MDN as the density estimator. # NOTE: we turn off z-scoring of the data, as we used NaNs for the missing trials. density_estimator = posterior_nn ( \"mdn\" , embedding_net = embedding_net , z_score_x = \"none\" ) Run training \u00b6 inference = SNPE ( prior , density_estimator = density_estimator ) # NOTE: we don't exclude invalid x because we used NaNs for the missing trials. inference . append_simulations ( theta , x , exclude_invalid_x = False , ) . train ( training_batch_size = 1000 ) posterior = inference . build_posterior () WARNING:root:Found 95000 NaN simulations and 0 Inf simulations. They are not excluded from training due to `exclude_invalid_x=False`.Training will likely fail, we strongly recommend `exclude_invalid_x=True` for Single-round NPE. Neural network successfully converged after 168 epochs. Amortized inference \u00b6 Comparing runtimes, we see that the NPE training takes a bit longer than the training on single trials for NLE above. However, we trained the density estimator such that it can handle multiple and changing number of iid trials (up to 20). Thus, we can obtain posterior samples for different x_o with just a single forward pass instead of having to run MCMC for each new observation. As you can see below, the c2st score for increasing number of observed trials remains close to the ideal 0.5 . npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) cs = [ c2st ( torch . from_numpy ( s1 ), s2 ) for s1 , s2 in zip ( true_samples , npe_samples )] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.50 c2st score for num_trials=15: 0.52 c2st score for num_trials=20: 0.55 num_trials = [ 1 , 5 , 15 , 20 ] xos = [ theta_o . repeat ( nt , 1 ) for nt in num_trials ] npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( npe_samples , points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] # We can easily obtain posteriors for many different x_os, instantly, because NPE is fully amortized: num_trials = [ 2 , 4 , 6 , 8 , 12 , 14 , 18 ] npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( npe_samples , points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s]","title":"SBI with trial-based data"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#sbi-with-iid-data-and-permutation-invariant-embeddings","text":"There are scenarios in which we observe multiple data points per experiment and we can assume that they are independent and identically distributed (iid, i.e., they are assumed to have the same underlying model parameters). For example, in decision-making experiments, the experiment is often repeated in trials with the same experimental settings and conditions. The corresponding set of trials is then assumed to be \u201ciid\u201d given a single parameter set. In such a scenario, we may want to obtain the posterior given a set of observation \\(p(\\theta | X=\\{x_i\\}_i^N)\\) .","title":"SBI with iid data and permutation-invariant embeddings"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#amortization-of-neural-network-training-iid-inference-with-nle-nre","text":"For some SBI variants the iid assumption can be exploited: when using a likelihood-based SBI method ( SNLE , SNRE ) one can train the density or ratio estimator on single-trial data, and then perform inference with MCMC or variational inference ( VI ). Crucially, because the data is iid and the estimator is trained on single-trial data, one can repeat the inference with a different x_o (a different set of trials, or different number of trials) without having to retrain the density estimator. One can interpet this as amortization of the SBI training: we can obtain a neural likelihood, or likelihood-ratio estimate for new x_o s without retraining, but we still have to run MCMC or VI to do inference. In addition, one cannot only change the number of trials of a new x_o , but also the entire inference setting. For example, one can apply hierarchical inference with changing hierarchical denpendencies between the model parameters\u2013all without having to retrain the density estimator because it estimates single-trail likelihoods.","title":"Amortization of neural network training: iid-inference with NLE / NRE"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#full-amortization-iid-inference-with-npe-and-permutation-invariant-embedding-nets","text":"When performing neural posterior estimation ( SNPE ) we cannot exploit the iid assumption directly. Thus, the underlying neural network takes x as input and predicts the parameters of the density estimator. As a consequence, if x is a set of iid observations \\(X=\\{x_i\\}_i^N\\) then the neural network has to be invariant to permutations of this set, i.e., it has to be permutation invariant. In addition, the neural network has to be able to consume a varying number of iid datapoints in order to be amortized over the number of trials. Therefore, in order to use SNPE for inference on iid data, we need to provide a corresponding embedding network that handles the iid-data. This will likely require some hyperparameter tuning and more training data for inference to work accurately. But once we have this, inference is fully amortized, i.e., we can get new posterior samples almost instantly without retraining and without running MCMC or VI .","title":"Full amortization: iid-inference with NPE and permutation-invariant embedding nets"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#sbi-with-trial-based-data","text":"For illustration, we use a simple linear Gaussian simulator, as in previous tutorials. The simulator takes a single parameter (vector) which is the mean of a Gaussian. The simulator then adds noise with a fixed variance (set to one). We define a Gaussian prior over the mean and perform inference. The observed data is also sampled from a Gaussian with some fixed \u201cground-truth\u201d parameter \\(\\theta_o\\) . Crucially, the observed data x_o can consist of multiple samples given the same ground-truth parameters and these samples are iid given \\(\\theta\\) : \\[ \\theta \\sim \\mathcal{N}(\\mu_0,\\; \\Sigma_0) \\\\ x | \\theta \\sim \\mathcal{N}(\\theta,\\; \\Sigma=I) \\\\ \\mathbf{x_o} = \\{x_o^i\\}_{i=1}^N \\sim \\mathcal{N}(\\theta_o,\\; \\Sigma=I) \\] For this toy problem, the ground-truth posterior is well defined, it is again a Gaussian, centered on the mean of \\(\\mathbf{x_o}\\) and with variance scaled by the number of trials \\(N\\) , i.e., the more trials we observe, the more information about the underlying \\(\\theta_o\\) we have and the more concentrated the posteriors becomes. We will illustrate this below: import torch import matplotlib.pyplot as plt from torch import zeros , ones , eye from torch.distributions import MultivariateNormal from sbi.inference import SNLE , SNPE , prepare_for_sbi , simulate_for_sbi from sbi.analysis import pairplot from sbi.utils.metrics import c2st from sbi.simulators.linear_gaussian import ( linear_gaussian , true_posterior_linear_gaussian_mvn_prior , ) # Seeding torch . manual_seed ( 1 ); # Gaussian simulator theta_dim = 2 x_dim = theta_dim # likelihood_mean will be likelihood_shift+theta likelihood_shift = - 1.0 * zeros ( x_dim ) likelihood_cov = 0.3 * eye ( x_dim ) prior_mean = zeros ( theta_dim ) prior_cov = eye ( theta_dim ) prior = MultivariateNormal ( loc = prior_mean , covariance_matrix = prior_cov ) # Define Gaussian simulator simulator , prior = prepare_for_sbi ( lambda theta : linear_gaussian ( theta , likelihood_shift , likelihood_cov ), prior ) # Use built-in function to obtain ground-truth posterior given x_o def get_true_posterior_samples ( x_o , num_samples = 1 ): return true_posterior_linear_gaussian_mvn_prior ( x_o , likelihood_shift , likelihood_cov , prior_mean , prior_cov ) . sample (( num_samples ,))","title":"SBI with trial-based data"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#the-analytical-posterior-concentrates-around-true-parameters-with-increasing-number-of-iid-trials","text":"num_trials = [ 1 , 5 , 15 , 20 ] theta_o = zeros ( 1 , theta_dim ) # Generate multiple x_os with increasing number of trials. xos = [ theta_o . repeat ( nt , 1 ) for nt in num_trials ] # Obtain analytical posterior samples for each of them. true_samples = [ get_true_posterior_samples ( xo , 5000 ) for xo in xos ] # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( true_samples , points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter.","title":"The analytical posterior concentrates around true parameters with increasing number of IID trials"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#iid-inference-with-nle","text":"(S)NLE and (S)NRE can perform inference given multiple IID obserations by using only single-trial training data (i.e., for training, we run the simulator only once per parameter set). Once the likelihood is learned on single trials (i.e., a neural network that predicts the likelihood of a single observation given a parameter set), one can sample the posterior for any number of trials. This works because, given a single-trial neural likelihood from (S)NLE or (S)NRE, we can calculate the joint likelihoods of all trials by multiplying them together (or adding them in log-space). The joint likelihood can then be plugged into MCMC or VI . sbi takes care of all of these steps, so you do not have to implement anything yourself: # Train SNLE. inferer = SNLE ( prior , show_progress_bars = True , density_estimator = \"mdn\" ) theta , x = simulate_for_sbi ( simulator , prior , 10000 , simulation_batch_size = 1000 ) inferer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ); Running 10000 simulations.: 0%| | 0/10000 [00:00<?, ?it/s] Neural network successfully converged after 43 epochs. # Obtain posterior samples for different number of iid xos. nle_samples = [] num_samples = 5000 mcmc_parameters = dict ( num_chains = 50 , thin = 10 , warmup_steps = 50 , init_strategy = \"proposal\" , ) mcmc_method = \"slice_np_vectorized\" posterior = inferer . build_posterior ( mcmc_method = mcmc_method , mcmc_parameters = mcmc_parameters , ) # Generate samples with MCMC given the same set of x_os as above. for xo in xos : nle_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xo )) Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 5 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 15 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 20 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] Note that sbi warns about iid-x with increasing number of trial here. We ignore the warning because that\u2019s exactly what we want to do. # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( nle_samples , points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); The pairplot above already indicates that (S)NLE is well able to obtain accurate posterior samples also for increasing number of trials (note that we trained the single-round version of SNLE so that we did not have to re-train it for new \\(x_o\\) ). Quantitatively we can measure the accuracy of SNLE by calculating the c2st score between SNLE and the true posterior samples, where the best accuracy is perfect for 0.5 : cs = [ c2st ( torch . from_numpy ( s1 ), torch . from_numpy ( s2 )) for s1 , s2 in zip ( true_samples , nle_samples ) ] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.51 c2st score for num_trials=15: 0.51 c2st score for num_trials=20: 0.51","title":"IID inference with NLE"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#iid-inference-with-npe-using-permutation-invariant-embedding-nets","text":"For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials. We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net. To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021). To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. This means that, unlike for (S)NLE and (S)NRE, (S)NPE requires to run the simulator multiple times for individual parameter sets to generate the training data. In order to implement this in sbi , \u201cunobserved\u201d trials in the training dataset have to be masked by NaNs (and ignore the resulting SBI warning about NaNs in the training data).","title":"IID inference with NPE using permutation-invariant embedding nets"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#construct-training-data-set","text":"# we need to fix the maximum number of trials. max_num_trials = 20 # construct training data set: we want to cover the full range of possible number of # trials num_training_samples = 5000 theta = prior . sample (( num_training_samples ,)) # there are certainly smarter ways to construct the training data set, but we go with a # for loop here for illustration purposes. x = torch . ones ( num_training_samples * max_num_trials , max_num_trials , x_dim ) * float ( \"nan\" ) for i in range ( num_training_samples ): xi = simulator ( theta [ i ] . repeat ( max_num_trials , 1 )) for j in range ( max_num_trials ): x [ i * max_num_trials + j , : j + 1 , :] = xi [: j + 1 , :] theta = theta . repeat_interleave ( max_num_trials , dim = 0 )","title":"Construct training data set."},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#build-embedding-net","text":"from sbi.neural_nets.embedding_nets import ( FCEmbedding , PermutationInvariantEmbedding , ) from sbi.utils import posterior_nn # embedding latent_dim = 10 single_trial_net = FCEmbedding ( input_dim = theta_dim , num_hiddens = 40 , num_layers = 2 , output_dim = latent_dim , ) embedding_net = PermutationInvariantEmbedding ( single_trial_net , trial_net_output_dim = latent_dim , # NOTE: post-embedding is not needed really. num_layers = 1 , num_hiddens = 10 , output_dim = 10 , ) # we choose a simple MDN as the density estimator. # NOTE: we turn off z-scoring of the data, as we used NaNs for the missing trials. density_estimator = posterior_nn ( \"mdn\" , embedding_net = embedding_net , z_score_x = \"none\" )","title":"Build embedding net"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#run-training","text":"inference = SNPE ( prior , density_estimator = density_estimator ) # NOTE: we don't exclude invalid x because we used NaNs for the missing trials. inference . append_simulations ( theta , x , exclude_invalid_x = False , ) . train ( training_batch_size = 1000 ) posterior = inference . build_posterior () WARNING:root:Found 95000 NaN simulations and 0 Inf simulations. They are not excluded from training due to `exclude_invalid_x=False`.Training will likely fail, we strongly recommend `exclude_invalid_x=True` for Single-round NPE. Neural network successfully converged after 168 epochs.","title":"Run training"},{"location":"tutorial/14_iid_data_and_permutation_invariant_embeddings/#amortized-inference","text":"Comparing runtimes, we see that the NPE training takes a bit longer than the training on single trials for NLE above. However, we trained the density estimator such that it can handle multiple and changing number of iid trials (up to 20). Thus, we can obtain posterior samples for different x_o with just a single forward pass instead of having to run MCMC for each new observation. As you can see below, the c2st score for increasing number of observed trials remains close to the ideal 0.5 . npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) cs = [ c2st ( torch . from_numpy ( s1 ), s2 ) for s1 , s2 in zip ( true_samples , npe_samples )] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.50 c2st score for num_trials=15: 0.52 c2st score for num_trials=20: 0.55 num_trials = [ 1 , 5 , 15 , 20 ] xos = [ theta_o . repeat ( nt , 1 ) for nt in num_trials ] npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( npe_samples , points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] # We can easily obtain posteriors for many different x_os, instantly, because NPE is fully amortized: num_trials = [ 2 , 4 , 6 , 8 , 12 , 14 , 18 ] npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( npe_samples , points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s]","title":"Amortized inference"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/","text":"SBI with iid data and permutation-invariant embeddings \u00b6 There are scenarios in which we observe multiple data points per experiment and we can assume that they are independent and identically distributed (iid, i.e., they are assumed to have the same underlying model parameters). For example, in a decision-making experiments, the experiment is often repeated in trials with the same experimental settings and conditions. The corresponding set of trials is then assumed to be \u201ciid\u201d. In such a scenario, we may want to obtain the posterior given a set of observation \\(p(\\theta | X=\\{x_i\\}_i^N)\\) . Amortization of neural network training: iid-inference with NLE / NRE \u00b6 For some SBI variants the iid assumption can be exploited: when using a likelihood-based SBI method ( SNLE , SNRE ) one can train the density or ratio estimator on single-trial data, and then perform inference with MCMC . Crucially, because the data is iid and the estimator is trained on single-trial data, one can repeat the inference with a different x_o (a different set of trials, or different number of trials) without having to retrain the density estimator. One can interpet this as amortization of the SBI training: we can obtain a neural likelihood, or likelihood-ratio estimate for new x_o s without retraining, but we still have to run MCMC or VI to do inference. In addition, one can not only change the number of trials of a new x_o , but also the entire inference setting. For example, one can apply hierarchical inference scenarios with changing hierarchical denpendencies between the model parameters\u2013all without having to retrain the density estimator because that is based on estimating single-trail likelihoods. Full amortization: iid-inference with NPE and permutation-invariant embedding nets \u00b6 When performing neural posterior estimation ( SNPE ) we cannot exploit the iid assumption directly because we are learning a density estimator in theta . Thus, the underlying neural network takes x as input and predicts the parameters of the density estimator. As a consequence, if x is a set of iid observations \\(X=\\{x_i\\}_i^N\\) then the neural network has to be invariant to permutations of this set, i.e., it has to be permutation invariant. Overall, this means that we can use SNPE for inference with iid data, however, we need to provide a corresponding embedding network that handles the iid-data and is permutation invariant. This will likely require some hyperparameter tuning and more training data for the inference to work accurately. But once we have this, the inference is fully amortized, i.e., we can get new posterior samples basically instantly without retraining and without running MCMC or VI . Let us first have a look how trial-based inference works in SBI before we discuss models with \u201cmixed data types\u201d. SBI with trial-based data \u00b6 For illustration we use a simple linear Gaussian simulator, as in previous tutorials. The simulator takes a single parameter (vector), the mean of the Gaussian, and its variance is set to one. We define a Gaussian prior over the mean and perform inference. The observed data is again a from a Gaussian with some fixed \u201cground-truth\u201d parameter \\(\\theta_o\\) . Crucially, the observed data x_o can consist of multiple samples given the same ground-truth parameters and these samples are then iid: \\[ \\theta \\sim \\mathcal{N}(\\mu_0,\\; \\Sigma_0) \\\\ x | \\theta \\sim \\mathcal{N}(\\theta,\\; \\Sigma=I) \\\\ \\mathbf{x_o} = \\{x_o^i\\}_{i=1}^N \\sim \\mathcal{N}(\\theta_o,\\; \\Sigma=I) \\] For this toy problem the ground-truth posterior is well defined, it is again a Gaussian, centered on the mean of \\(\\mathbf{x_o}\\) and with variance scaled by the number of trials \\(N\\) , i.e., the more trials we observe, the more information about the underlying \\(\\theta_o\\) we have and the more concentrated the posteriors becomes. We will illustrate this below: import torch import matplotlib.pyplot as plt from torch import zeros , ones , eye from torch.distributions import MultivariateNormal from sbi.inference import SNLE , SNPE , prepare_for_sbi , simulate_for_sbi from sbi.analysis import pairplot from sbi.utils.metrics import c2st from sbi.simulators.linear_gaussian import ( linear_gaussian , true_posterior_linear_gaussian_mvn_prior , ) # Seeding torch . manual_seed ( 1 ); # Gaussian simulator theta_dim = 2 x_dim = theta_dim # likelihood_mean will be likelihood_shift+theta likelihood_shift = - 1.0 * zeros ( x_dim ) likelihood_cov = 0.3 * eye ( x_dim ) prior_mean = zeros ( theta_dim ) prior_cov = eye ( theta_dim ) prior = MultivariateNormal ( loc = prior_mean , covariance_matrix = prior_cov ) # Define Gaussian simulator simulator , prior = prepare_for_sbi ( lambda theta : linear_gaussian ( theta , likelihood_shift , likelihood_cov ), prior ) # Use built-in function to obtain ground-truth posterior given x_o def get_true_posterior_samples ( x_o , num_samples = 1 ): return true_posterior_linear_gaussian_mvn_prior ( x_o , likelihood_shift , likelihood_cov , prior_mean , prior_cov ) . sample (( num_samples ,)) The analytical posterior concentrates around true parameters with increasing number of IID trials \u00b6 num_trials = [ 1 , 5 , 15 , 20 ] theta_o = zeros ( 1 , theta_dim ) # Generate multiple x_os with increasing number of trials. xos = [ theta_o . repeat ( nt , 1 ) for nt in num_trials ] # Obtain analytical posterior samples for each of them. true_samples = [ get_true_posterior_samples ( xo , 5000 ) for xo in xos ] # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( true_samples , points = theta_o , diag = \"kde\" , offdiag = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter. IID inference with NLE \u00b6 (S)NLE can easily perform inference given multiple IID x because it is based on learning the likelihood. Once the likelihood is learned on single trials, i.e., a neural network that given a single observation and a parameter predicts the likelihood of that observation given the parameter, one can perform MCMC to obtain posterior samples. MCMC relies on evaluating ratios of likelihoods of candidate parameters to either accept or reject them to be posterior samples. When inferring the posterior given multiple IID observation, these likelihoods are just the joint likelihoods of each IID observation given the current parameter candidate. Thus, given a neural likelihood from SNLE, we can calculate these joint likelihoods and perform MCMC given IID data, we just have to multiply together (or add in log-space) the individual trial-likelihoods ( sbi takes care of that). # Train SNLE. inferer = SNLE ( prior , show_progress_bars = True , density_estimator = \"mdn\" ) theta , x = simulate_for_sbi ( simulator , prior , 10000 , simulation_batch_size = 1000 ) inferer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ); Running 10000 simulations.: 0%| | 0/10000 [00:00<?, ?it/s] Neural network successfully converged after 43 epochs. # Obtain posterior samples for different number of iid xos. nle_samples = [] num_samples = 5000 mcmc_parameters = dict ( num_chains = 50 , thin = 10 , warmup_steps = 50 , init_strategy = \"proposal\" , ) mcmc_method = \"slice_np_vectorized\" posterior = inferer . build_posterior ( mcmc_method = mcmc_method , mcmc_parameters = mcmc_parameters , ) # Generate samples with MCMC given the same set of x_os as above. for xo in xos : nle_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xo )) Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 5 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 15 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 20 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] Note that sbi warns about iid-x with increasing number of trial here. We ignore the warning because that\u2019s exactly what we want to do. # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( nle_samples , points = theta_o , diag = \"kde\" , offdiag = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); The pairplot above already indicates that (S)NLE is well able to obtain accurate posterior samples also for increasing number of trials (note that we trained the single-round version of SNLE so that we did not have to re-train it for new \\(x_o\\) ). Quantitatively we can measure the accuracy of SNLE by calculating the c2st score between SNLE and the true posterior samples, where the best accuracy is perfect for 0.5 : cs = [ c2st ( torch . from_numpy ( s1 ), torch . from_numpy ( s2 )) for s1 , s2 in zip ( true_samples , nle_samples ) ] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.51 c2st score for num_trials=15: 0.51 c2st score for num_trials=20: 0.51 IID inference with NPE using permutation-invariant embedding nets \u00b6 For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials. We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net. To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021). To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. As it is difficult to handle tensors of varying lengths in the SBI training loop, we construct a training data set in which \u201cunobserved\u201d trials are mask by NaNs (and ignore the resulting SBI warning about NaNs in the training data). Construct training data set. \u00b6 # we need to fix the maximum number of trials. max_num_trials = 20 # construct training data set: we want to cover the full range of possible number of # trials num_training_samples = 5000 theta = prior . sample (( num_training_samples ,)) # there are certainly smarter ways to construct the training data set, but we go with a # for loop here for illustration purposes. x = torch . ones ( num_training_samples * max_num_trials , max_num_trials , x_dim ) * float ( \"nan\" ) for i in range ( num_training_samples ): xi = simulator ( theta [ i ] . repeat ( max_num_trials , 1 )) for j in range ( max_num_trials ): x [ i * max_num_trials + j , : j + 1 , :] = xi [: j + 1 , :] theta = theta . repeat_interleave ( max_num_trials , dim = 0 ) Build embedding net \u00b6 from sbi.neural_nets.embedding_nets import ( FCEmbedding , PermutationInvariantEmbedding , ) from sbi.utils import posterior_nn # embedding latent_dim = 10 single_trial_net = FCEmbedding ( input_dim = theta_dim , num_hiddens = 40 , num_layers = 2 , output_dim = latent_dim , ) embedding_net = PermutationInvariantEmbedding ( single_trial_net , trial_net_output_dim = latent_dim , # NOTE: post-embedding is not needed really. num_layers = 1 , num_hiddens = 10 , output_dim = 10 , ) # we choose a simple MDN as the density estimator. # NOTE: we turn off z-scoring of the data, as we used NaNs for the missing trials. density_estimator = posterior_nn ( \"mdn\" , embedding_net = embedding_net , z_score_x = \"none\" ) Run training \u00b6 inference = SNPE ( prior , density_estimator = density_estimator ) # NOTE: we don't exclude invalid x because we used NaNs for the missing trials. inference . append_simulations ( theta , x , exclude_invalid_x = False , ) . train ( training_batch_size = 1000 ) posterior = inference . build_posterior () WARNING:root:Found 95000 NaN simulations and 0 Inf simulations. They are not excluded from training due to `exclude_invalid_x=False`.Training will likely fail, we strongly recommend `exclude_invalid_x=True` for Single-round NPE. Neural network successfully converged after 168 epochs. Amortized inference \u00b6 Comparing runtimes, we see that the NPE training takes a bit longer than the training on single trials for NLE above. However, we trained the density estimator such that it can handle multiple and changing number of iid trials (up to 20). Thus, we can obtain posterior samples for different x_o with just a single forward pass instead of having to run MCMC for each new observation. As you can see below, the c2st score for increasing number of observed trials remains close to the ideal 0.5 . npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) cs = [ c2st ( torch . from_numpy ( s1 ), s2 ) for s1 , s2 in zip ( true_samples , npe_samples )] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.50 c2st score for num_trials=15: 0.52 c2st score for num_trials=20: 0.55 num_trials = [ 1 , 5 , 15 , 20 ] xos = [ theta_o . repeat ( nt , 1 ) for nt in num_trials ] npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( npe_samples , points = theta_o , diag = \"kde\" , offdiag = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] # We can easily obtain posteriors for many different x_os, instantly, because NPE is fully amortized: num_trials = [ 2 , 4 , 6 , 8 , 12 , 14 , 18 ] npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( npe_samples , points = theta_o , diag = \"kde\" , offdiag = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s]","title":"SBI with iid data and permutation-invariant embeddings"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#sbi-with-iid-data-and-permutation-invariant-embeddings","text":"There are scenarios in which we observe multiple data points per experiment and we can assume that they are independent and identically distributed (iid, i.e., they are assumed to have the same underlying model parameters). For example, in a decision-making experiments, the experiment is often repeated in trials with the same experimental settings and conditions. The corresponding set of trials is then assumed to be \u201ciid\u201d. In such a scenario, we may want to obtain the posterior given a set of observation \\(p(\\theta | X=\\{x_i\\}_i^N)\\) .","title":"SBI with iid data and permutation-invariant embeddings"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#amortization-of-neural-network-training-iid-inference-with-nle-nre","text":"For some SBI variants the iid assumption can be exploited: when using a likelihood-based SBI method ( SNLE , SNRE ) one can train the density or ratio estimator on single-trial data, and then perform inference with MCMC . Crucially, because the data is iid and the estimator is trained on single-trial data, one can repeat the inference with a different x_o (a different set of trials, or different number of trials) without having to retrain the density estimator. One can interpet this as amortization of the SBI training: we can obtain a neural likelihood, or likelihood-ratio estimate for new x_o s without retraining, but we still have to run MCMC or VI to do inference. In addition, one can not only change the number of trials of a new x_o , but also the entire inference setting. For example, one can apply hierarchical inference scenarios with changing hierarchical denpendencies between the model parameters\u2013all without having to retrain the density estimator because that is based on estimating single-trail likelihoods.","title":"Amortization of neural network training: iid-inference with NLE / NRE"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#full-amortization-iid-inference-with-npe-and-permutation-invariant-embedding-nets","text":"When performing neural posterior estimation ( SNPE ) we cannot exploit the iid assumption directly because we are learning a density estimator in theta . Thus, the underlying neural network takes x as input and predicts the parameters of the density estimator. As a consequence, if x is a set of iid observations \\(X=\\{x_i\\}_i^N\\) then the neural network has to be invariant to permutations of this set, i.e., it has to be permutation invariant. Overall, this means that we can use SNPE for inference with iid data, however, we need to provide a corresponding embedding network that handles the iid-data and is permutation invariant. This will likely require some hyperparameter tuning and more training data for the inference to work accurately. But once we have this, the inference is fully amortized, i.e., we can get new posterior samples basically instantly without retraining and without running MCMC or VI . Let us first have a look how trial-based inference works in SBI before we discuss models with \u201cmixed data types\u201d.","title":"Full amortization: iid-inference with NPE and permutation-invariant embedding nets"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#sbi-with-trial-based-data","text":"For illustration we use a simple linear Gaussian simulator, as in previous tutorials. The simulator takes a single parameter (vector), the mean of the Gaussian, and its variance is set to one. We define a Gaussian prior over the mean and perform inference. The observed data is again a from a Gaussian with some fixed \u201cground-truth\u201d parameter \\(\\theta_o\\) . Crucially, the observed data x_o can consist of multiple samples given the same ground-truth parameters and these samples are then iid: \\[ \\theta \\sim \\mathcal{N}(\\mu_0,\\; \\Sigma_0) \\\\ x | \\theta \\sim \\mathcal{N}(\\theta,\\; \\Sigma=I) \\\\ \\mathbf{x_o} = \\{x_o^i\\}_{i=1}^N \\sim \\mathcal{N}(\\theta_o,\\; \\Sigma=I) \\] For this toy problem the ground-truth posterior is well defined, it is again a Gaussian, centered on the mean of \\(\\mathbf{x_o}\\) and with variance scaled by the number of trials \\(N\\) , i.e., the more trials we observe, the more information about the underlying \\(\\theta_o\\) we have and the more concentrated the posteriors becomes. We will illustrate this below: import torch import matplotlib.pyplot as plt from torch import zeros , ones , eye from torch.distributions import MultivariateNormal from sbi.inference import SNLE , SNPE , prepare_for_sbi , simulate_for_sbi from sbi.analysis import pairplot from sbi.utils.metrics import c2st from sbi.simulators.linear_gaussian import ( linear_gaussian , true_posterior_linear_gaussian_mvn_prior , ) # Seeding torch . manual_seed ( 1 ); # Gaussian simulator theta_dim = 2 x_dim = theta_dim # likelihood_mean will be likelihood_shift+theta likelihood_shift = - 1.0 * zeros ( x_dim ) likelihood_cov = 0.3 * eye ( x_dim ) prior_mean = zeros ( theta_dim ) prior_cov = eye ( theta_dim ) prior = MultivariateNormal ( loc = prior_mean , covariance_matrix = prior_cov ) # Define Gaussian simulator simulator , prior = prepare_for_sbi ( lambda theta : linear_gaussian ( theta , likelihood_shift , likelihood_cov ), prior ) # Use built-in function to obtain ground-truth posterior given x_o def get_true_posterior_samples ( x_o , num_samples = 1 ): return true_posterior_linear_gaussian_mvn_prior ( x_o , likelihood_shift , likelihood_cov , prior_mean , prior_cov ) . sample (( num_samples ,))","title":"SBI with trial-based data"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#the-analytical-posterior-concentrates-around-true-parameters-with-increasing-number-of-iid-trials","text":"num_trials = [ 1 , 5 , 15 , 20 ] theta_o = zeros ( 1 , theta_dim ) # Generate multiple x_os with increasing number of trials. xos = [ theta_o . repeat ( nt , 1 ) for nt in num_trials ] # Obtain analytical posterior samples for each of them. true_samples = [ get_true_posterior_samples ( xo , 5000 ) for xo in xos ] # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( true_samples , points = theta_o , diag = \"kde\" , offdiag = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter.","title":"The analytical posterior concentrates around true parameters with increasing number of IID trials"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#iid-inference-with-nle","text":"(S)NLE can easily perform inference given multiple IID x because it is based on learning the likelihood. Once the likelihood is learned on single trials, i.e., a neural network that given a single observation and a parameter predicts the likelihood of that observation given the parameter, one can perform MCMC to obtain posterior samples. MCMC relies on evaluating ratios of likelihoods of candidate parameters to either accept or reject them to be posterior samples. When inferring the posterior given multiple IID observation, these likelihoods are just the joint likelihoods of each IID observation given the current parameter candidate. Thus, given a neural likelihood from SNLE, we can calculate these joint likelihoods and perform MCMC given IID data, we just have to multiply together (or add in log-space) the individual trial-likelihoods ( sbi takes care of that). # Train SNLE. inferer = SNLE ( prior , show_progress_bars = True , density_estimator = \"mdn\" ) theta , x = simulate_for_sbi ( simulator , prior , 10000 , simulation_batch_size = 1000 ) inferer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ); Running 10000 simulations.: 0%| | 0/10000 [00:00<?, ?it/s] Neural network successfully converged after 43 epochs. # Obtain posterior samples for different number of iid xos. nle_samples = [] num_samples = 5000 mcmc_parameters = dict ( num_chains = 50 , thin = 10 , warmup_steps = 50 , init_strategy = \"proposal\" , ) mcmc_method = \"slice_np_vectorized\" posterior = inferer . build_posterior ( mcmc_method = mcmc_method , mcmc_parameters = mcmc_parameters , ) # Generate samples with MCMC given the same set of x_os as above. for xo in xos : nle_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xo )) Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 5 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 15 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 20 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 50 chains: 0%| | 0/75000 [00:00<?, ?it/s] Note that sbi warns about iid-x with increasing number of trial here. We ignore the warning because that\u2019s exactly what we want to do. # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( nle_samples , points = theta_o , diag = \"kde\" , offdiag = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); The pairplot above already indicates that (S)NLE is well able to obtain accurate posterior samples also for increasing number of trials (note that we trained the single-round version of SNLE so that we did not have to re-train it for new \\(x_o\\) ). Quantitatively we can measure the accuracy of SNLE by calculating the c2st score between SNLE and the true posterior samples, where the best accuracy is perfect for 0.5 : cs = [ c2st ( torch . from_numpy ( s1 ), torch . from_numpy ( s2 )) for s1 , s2 in zip ( true_samples , nle_samples ) ] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.51 c2st score for num_trials=15: 0.51 c2st score for num_trials=20: 0.51","title":"IID inference with NLE"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#iid-inference-with-npe-using-permutation-invariant-embedding-nets","text":"For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials. We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net. To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021). To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. As it is difficult to handle tensors of varying lengths in the SBI training loop, we construct a training data set in which \u201cunobserved\u201d trials are mask by NaNs (and ignore the resulting SBI warning about NaNs in the training data).","title":"IID inference with NPE using permutation-invariant embedding nets"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#construct-training-data-set","text":"# we need to fix the maximum number of trials. max_num_trials = 20 # construct training data set: we want to cover the full range of possible number of # trials num_training_samples = 5000 theta = prior . sample (( num_training_samples ,)) # there are certainly smarter ways to construct the training data set, but we go with a # for loop here for illustration purposes. x = torch . ones ( num_training_samples * max_num_trials , max_num_trials , x_dim ) * float ( \"nan\" ) for i in range ( num_training_samples ): xi = simulator ( theta [ i ] . repeat ( max_num_trials , 1 )) for j in range ( max_num_trials ): x [ i * max_num_trials + j , : j + 1 , :] = xi [: j + 1 , :] theta = theta . repeat_interleave ( max_num_trials , dim = 0 )","title":"Construct training data set."},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#build-embedding-net","text":"from sbi.neural_nets.embedding_nets import ( FCEmbedding , PermutationInvariantEmbedding , ) from sbi.utils import posterior_nn # embedding latent_dim = 10 single_trial_net = FCEmbedding ( input_dim = theta_dim , num_hiddens = 40 , num_layers = 2 , output_dim = latent_dim , ) embedding_net = PermutationInvariantEmbedding ( single_trial_net , trial_net_output_dim = latent_dim , # NOTE: post-embedding is not needed really. num_layers = 1 , num_hiddens = 10 , output_dim = 10 , ) # we choose a simple MDN as the density estimator. # NOTE: we turn off z-scoring of the data, as we used NaNs for the missing trials. density_estimator = posterior_nn ( \"mdn\" , embedding_net = embedding_net , z_score_x = \"none\" )","title":"Build embedding net"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#run-training","text":"inference = SNPE ( prior , density_estimator = density_estimator ) # NOTE: we don't exclude invalid x because we used NaNs for the missing trials. inference . append_simulations ( theta , x , exclude_invalid_x = False , ) . train ( training_batch_size = 1000 ) posterior = inference . build_posterior () WARNING:root:Found 95000 NaN simulations and 0 Inf simulations. They are not excluded from training due to `exclude_invalid_x=False`.Training will likely fail, we strongly recommend `exclude_invalid_x=True` for Single-round NPE. Neural network successfully converged after 168 epochs.","title":"Run training"},{"location":"tutorial/14_iid_data_and_permutation_invarient_embeddings/#amortized-inference","text":"Comparing runtimes, we see that the NPE training takes a bit longer than the training on single trials for NLE above. However, we trained the density estimator such that it can handle multiple and changing number of iid trials (up to 20). Thus, we can obtain posterior samples for different x_o with just a single forward pass instead of having to run MCMC for each new observation. As you can see below, the c2st score for increasing number of observed trials remains close to the ideal 0.5 . npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) cs = [ c2st ( torch . from_numpy ( s1 ), s2 ) for s1 , s2 in zip ( true_samples , npe_samples )] for _ in range ( len ( num_trials )): print ( f \"c2st score for num_trials= { num_trials [ _ ] } : { cs [ _ ] . item () : .2f } \" ) Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] c2st score for num_trials=1: 0.50 c2st score for num_trials=5: 0.50 c2st score for num_trials=15: 0.52 c2st score for num_trials=20: 0.55 num_trials = [ 1 , 5 , 15 , 20 ] xos = [ theta_o . repeat ( nt , 1 ) for nt in num_trials ] npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( npe_samples , points = theta_o , diag = \"kde\" , offdiag = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] # We can easily obtain posteriors for many different x_os, instantly, because NPE is fully amortized: num_trials = [ 2 , 4 , 6 , 8 , 12 , 14 , 18 ] npe_samples = [] for xo in xos : # we need to pad the x_os with NaNs to match the shape of the training data. xoi = torch . ones ( 1 , max_num_trials , x_dim ) * float ( \"nan\" ) xoi [ 0 , : len ( xo ), :] = xo npe_samples . append ( posterior . sample ( sample_shape = ( num_samples ,), x = xoi )) # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( npe_samples , points = theta_o , diag = \"kde\" , offdiag = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ f \" { nt } trials\" if nt > 1 else f \" { nt } trial\" for nt in num_trials ] + [ r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s] Drawing 5000 posterior samples: 0%| | 0/5000 [00:00<?, ?it/s]","title":"Amortized inference"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/","text":"MCMC diagnostics with Arviz \u00b6 This tutorial shows how to evaluate the quality of MCMC samples generated via sbi using the arviz package. Outline: 1) Train MNLE to approximate the likelihood underlying the simulator 2) Run MCMC using pyro MCMC samplers via sbi interface 3) Use arviz to visualize the posterior, predictive distributions and MCMC diagnostics. import arviz as az import torch from sbi.inference import MNLE , likelihood_estimator_based_potential from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Gamma from sbi.utils import MultipleIndependent from sbi.inference import MCMCPosterior # Seeding torch . manual_seed ( 1 ); # Toy simulator for mixed data def mixed_simulator ( theta ): beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = 2 * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # Define independent priors for each dimension. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , ) Train MNLE to approximate the likelihood \u00b6 For this tutorial, we will use a simple simulator with two parameters. For details see the example on the decision making model . Here, we pass mcmc_method=\"nuts\" in order to use the underlying pyro No-U-turn sampler , but it would work as well with other samplers (e.g. \u201cslice_np_vectorized\u201d, \u201chmc\u201d). Additionally, when calling posterior.sample(...) we pass return_arviz=True so that the Arviz InferenceData object is returned. This object gives us access to the wealth of MCMC diagnostics tool provided by arviz . # Generate training data and train MNLE. num_simulations = 10000 theta = prior . sample (( num_simulations ,)) x = mixed_simulator ( theta ) trainer = MNLE ( prior ) likelihood_estimator = trainer . append_simulations ( theta , x ) . train () /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 65 epochs. Run Pyro NUTS MCMC and obtain arviz InferenceData object \u00b6 # Simulate \"observed\" data x_o torch . manual_seed ( 42 ) num_trials = 100 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) # Set MCMC parameters and run Pyro NUTS. mcmc_parameters = dict ( num_chains = 4 , thin = 5 , warmup_steps = 50 , init_strategy = \"proposal\" , method = \"nuts\" , ) num_samples = 1000 # get the potential function and parameter transform for constructing the posterior potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) mnle_posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform , ** mcmc_parameters ) mnle_samples = mnle_posterior . sample ( ( num_samples ,), x = x_o , show_progress_bars = False ) # get arviz InferenceData object from posterior inference_data = mnle_posterior . get_arviz_inference_data () /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:280: UserWarning: An x with a batch size of 100 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Generate arviz plots \u00b6 The resulting InferenceData object can be passed to most arviz plotting functions, and there are plenty see here for an overview. To get a better understanding of the InferenceData object see here . Below and overview of common MCMC diagnostics plot, see the corresponding arviz documentation for interpretation of the plots. We will a full use-case using the SBI-MCMC-arviz workflow soon. print ( inference_data . posterior ) <xarray.Dataset> Dimensions: (chain: 4, draw: 1254, theta_dim_0: 2) Coordinates: * chain (chain) int64 0 1 2 3 * draw (draw) int64 0 1 2 3 4 5 6 ... 1248 1249 1250 1251 1252 1253 * theta_dim_0 (theta_dim_0) int64 0 1 Data variables: theta (chain, draw, theta_dim_0) float32 2.125 0.8092 ... 0.8088 Attributes: created_at: 2022-08-10T14:02:41.300799 arviz_version: 0.11.2 Diagnostic plots \u00b6 az . style . use ( \"arviz-darkgrid\" ) az . plot_rank ( inference_data ) array([<AxesSubplot:title={'center':'theta\\n0'}, xlabel='Rank (all chains)', ylabel='Chain'>, <AxesSubplot:title={'center':'theta\\n1'}, xlabel='Rank (all chains)', ylabel='Chain'>], dtype=object) az . plot_autocorr ( inference_data ); az . plot_trace ( inference_data , compact = False ); az . plot_ess ( inference_data , kind = \"evolution\" ); Posterior density plots \u00b6 az . plot_posterior ( inference_data ) array([<AxesSubplot:title={'center':'theta\\n0'}>, <AxesSubplot:title={'center':'theta\\n1'}>], dtype=object) print ( f \"Given the { num_trials } we observed, the posterior is centered around true underlying parameters theta_o: { theta_o } \" ) Given the 100 we observed, the posterior is centered around true underlying parameters theta_o: tensor([[1.9622, 0.7550]]) az . plot_pair ( inference_data ) <AxesSubplot:xlabel='theta\\n0', ylabel='theta\\n1'> az . plot_pair ( inference_data , var_names = [ \"theta\" ], kind = \"hexbin\" , marginals = True , figsize = ( 10 , 10 ), ) array([[<AxesSubplot:>, None], [<AxesSubplot:xlabel='theta\\n0', ylabel='theta\\n1'>, <AxesSubplot:>]], dtype=object)","title":"Density plots and MCMC diagnostics with ArviZ"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#mcmc-diagnostics-with-arviz","text":"This tutorial shows how to evaluate the quality of MCMC samples generated via sbi using the arviz package. Outline: 1) Train MNLE to approximate the likelihood underlying the simulator 2) Run MCMC using pyro MCMC samplers via sbi interface 3) Use arviz to visualize the posterior, predictive distributions and MCMC diagnostics. import arviz as az import torch from sbi.inference import MNLE , likelihood_estimator_based_potential from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Gamma from sbi.utils import MultipleIndependent from sbi.inference import MCMCPosterior # Seeding torch . manual_seed ( 1 ); # Toy simulator for mixed data def mixed_simulator ( theta ): beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = 2 * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # Define independent priors for each dimension. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , )","title":"MCMC diagnostics with Arviz"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#train-mnle-to-approximate-the-likelihood","text":"For this tutorial, we will use a simple simulator with two parameters. For details see the example on the decision making model . Here, we pass mcmc_method=\"nuts\" in order to use the underlying pyro No-U-turn sampler , but it would work as well with other samplers (e.g. \u201cslice_np_vectorized\u201d, \u201chmc\u201d). Additionally, when calling posterior.sample(...) we pass return_arviz=True so that the Arviz InferenceData object is returned. This object gives us access to the wealth of MCMC diagnostics tool provided by arviz . # Generate training data and train MNLE. num_simulations = 10000 theta = prior . sample (( num_simulations ,)) x = mixed_simulator ( theta ) trainer = MNLE ( prior ) likelihood_estimator = trainer . append_simulations ( theta , x ) . train () /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 65 epochs.","title":"Train MNLE to approximate the likelihood"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#run-pyro-nuts-mcmc-and-obtain-arviz-inferencedata-object","text":"# Simulate \"observed\" data x_o torch . manual_seed ( 42 ) num_trials = 100 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) # Set MCMC parameters and run Pyro NUTS. mcmc_parameters = dict ( num_chains = 4 , thin = 5 , warmup_steps = 50 , init_strategy = \"proposal\" , method = \"nuts\" , ) num_samples = 1000 # get the potential function and parameter transform for constructing the posterior potential_fn , parameter_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) mnle_posterior = MCMCPosterior ( potential_fn , proposal = prior , theta_transform = parameter_transform , ** mcmc_parameters ) mnle_samples = mnle_posterior . sample ( ( num_samples ,), x = x_o , show_progress_bars = False ) # get arviz InferenceData object from posterior inference_data = mnle_posterior . get_arviz_inference_data () /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:280: UserWarning: An x with a batch size of 100 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn(","title":"Run Pyro NUTS MCMC and obtain arviz InferenceData object"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#generate-arviz-plots","text":"The resulting InferenceData object can be passed to most arviz plotting functions, and there are plenty see here for an overview. To get a better understanding of the InferenceData object see here . Below and overview of common MCMC diagnostics plot, see the corresponding arviz documentation for interpretation of the plots. We will a full use-case using the SBI-MCMC-arviz workflow soon. print ( inference_data . posterior ) <xarray.Dataset> Dimensions: (chain: 4, draw: 1254, theta_dim_0: 2) Coordinates: * chain (chain) int64 0 1 2 3 * draw (draw) int64 0 1 2 3 4 5 6 ... 1248 1249 1250 1251 1252 1253 * theta_dim_0 (theta_dim_0) int64 0 1 Data variables: theta (chain, draw, theta_dim_0) float32 2.125 0.8092 ... 0.8088 Attributes: created_at: 2022-08-10T14:02:41.300799 arviz_version: 0.11.2","title":"Generate arviz plots"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#diagnostic-plots","text":"az . style . use ( \"arviz-darkgrid\" ) az . plot_rank ( inference_data ) array([<AxesSubplot:title={'center':'theta\\n0'}, xlabel='Rank (all chains)', ylabel='Chain'>, <AxesSubplot:title={'center':'theta\\n1'}, xlabel='Rank (all chains)', ylabel='Chain'>], dtype=object) az . plot_autocorr ( inference_data ); az . plot_trace ( inference_data , compact = False ); az . plot_ess ( inference_data , kind = \"evolution\" );","title":"Diagnostic plots"},{"location":"tutorial/15_mcmc_diagnostics_with_arviz/#posterior-density-plots","text":"az . plot_posterior ( inference_data ) array([<AxesSubplot:title={'center':'theta\\n0'}>, <AxesSubplot:title={'center':'theta\\n1'}>], dtype=object) print ( f \"Given the { num_trials } we observed, the posterior is centered around true underlying parameters theta_o: { theta_o } \" ) Given the 100 we observed, the posterior is centered around true underlying parameters theta_o: tensor([[1.9622, 0.7550]]) az . plot_pair ( inference_data ) <AxesSubplot:xlabel='theta\\n0', ylabel='theta\\n1'> az . plot_pair ( inference_data , var_names = [ \"theta\" ], kind = \"hexbin\" , marginals = True , figsize = ( 10 , 10 ), ) array([[<AxesSubplot:>, None], [<AxesSubplot:xlabel='theta\\n0', ylabel='theta\\n1'>, <AxesSubplot:>]], dtype=object)","title":"Posterior density  plots"},{"location":"tutorial/16_implemented_methods/","text":"API of implemented methods \u00b6 This notebook spells out the API for all algorithms implemented in the sbi toolbox: Posterior estimation (SNPE) Likelihood estimation (SNLE) Likelihood-ratio estimation (SNRE) Utilities Posterior estimation (SNPE) \u00b6 Fast \u03b5-free Inference of Simulation Models with Bayesian Conditional Density Estimation by Papamakarios & Murray (NeurIPS 2016) [PDF] [BibTeX] from sbi.inference import SNPE_A inference = SNPE_A ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Automatic posterior transformation for likelihood-free inference by Greenberg, Nonnenmacher & Macke (ICML 2019) [PDF] from sbi.inference import SNPE inference = SNPE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Truncated proposals for scalable and hassle-free simulation-based inference by Deistler, Goncalves & Macke (NeurIPS 2022) [Paper] from sbi.inference import SNPE from sbi.utils import get_density_thresholder , RestrictedPrior inference = SNPE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( force_first_round_loss = True ) posterior = inference . build_posterior () . set_default_x ( x_o ) accept_reject_fn = get_density_thresholder ( posterior , quantile = 1e-4 ) proposal = RestrictedPrior ( prior , accept_reject_fn , sample_with = \"rejection\" ) Likelihood estimation (SNLE) \u00b6 Sequential neural likelihood: Fast likelihood-free inference with autoregressive flows by Papamakarios, Sterratt & Murray (AISTATS 2019) [PDF] [BibTeX] from sbi.inference import SNLE inference = SNLE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Variational methods for simulation-based inference by Gl\u00f6ckler, Deistler, Macke (ICLR 2022) [Paper] from sbi.inference import SNLE inference = SNLE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior ( sample_with = \"vi\" , vi_method = \"fKL\" ) . set_default_x ( x_o ) proposal = posterior Flexible and efficient simulation-based inference for models of decision-making by Boelts, Lueckmann, Gao, Macke (Elife 2022) [Paper] from sbi.inference import MNLE inference = MNLE ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) Likelihood-ratio estimation (SNRE) \u00b6 Likelihood-free MCMC with Amortized Approximate Likelihood Ratios by Hermans, Begy & Louppe (ICML 2020) [PDF] from sbi.inference import SNRE_A inference = SNRE_A ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) On Contrastive Learning for Likelihood-free Inference Durkan, Murray & Papamakarios (ICML 2020) [PDF] . from sbi.inference import SNRE inference = SNRE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation by Delaunoy, Hermans, Rozet, Wehenkel & Louppe (NeurIPS 2022) [PDF] from sbi.inference import BNRE inference = BNRE ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( regularization_strength = 100. ) posterior = inference . build_posterior () . set_default_x ( x_o ) Contrastive Neural Ratio Estimation Benjamin Kurt Miller, Christoph Weniger, Patrick Forr\u00e9 (NeurIPS 2022) [PDF] # The main feature of NRE-C is producing an exact ratio of densities at optimum, even when using multiple contrastive pairs (classes). from sbi.inference import SNRE_C # Amortized inference inference = SNRE_C ( prior ) proposal = prior theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( num_classes = 5 , # SNRE_C sees `2 * num_classes - 1` marginally drawn contrastive pairs. gamma = 1.0 , # SNRE_C can control the weight between terms in its loss function. ) posterior = inference . build_posterior () . set_default_x ( x_o ) Utilities \u00b6 Simulation-based calibration by Talts, Betancourt, Simpson, Vehtari, Gelman (arxiv 2018) [Paper] ) from sbi.analysis import run_sbc , sbc_rank_plot thetas = prior . sample (( 1_000 ,)) xs = simulator ( thetas ) ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = 1_000 ) _ = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , ) Restriction estimator by Deistler, Macke & Goncalves (PNAS 2022) [Paper] from sbi.inference import SNPE from sbi.utils import RestrictionEstimator restriction_estimator = RestrictionEstimator ( prior = prior ) proposal = prior for _ in range ( num_rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) restriction_estimator . append_simulations ( theta , x ) classifier = restriction_estimator . train () proposal = restriction_estimator . restrict_prior () all_theta , all_x , _ = restriction_estimator . get_simulations () inference = SNPE ( prior ) density_estimator = inference . append_simulations ( all_theta , all_x ) . train () posterior = inference . build_posterior () Expected coverage (sample-based) as computed in Deistler, Goncalves, Macke (Neurips 2022) [Paper] and in Rozet, Louppe (2021) [Paper] from sbi.analysis import run_sbc , sbc_rank_plot thetas = prior . sample (( 1_000 ,)) xs = simulator ( thetas ) ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = 1_000 , reduce_fns = posterior . log_prob ) _ = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , )","title":"Implemented algorithms"},{"location":"tutorial/16_implemented_methods/#api-of-implemented-methods","text":"This notebook spells out the API for all algorithms implemented in the sbi toolbox: Posterior estimation (SNPE) Likelihood estimation (SNLE) Likelihood-ratio estimation (SNRE) Utilities","title":"API of implemented methods"},{"location":"tutorial/16_implemented_methods/#posterior-estimation-snpe","text":"Fast \u03b5-free Inference of Simulation Models with Bayesian Conditional Density Estimation by Papamakarios & Murray (NeurIPS 2016) [PDF] [BibTeX] from sbi.inference import SNPE_A inference = SNPE_A ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Automatic posterior transformation for likelihood-free inference by Greenberg, Nonnenmacher & Macke (ICML 2019) [PDF] from sbi.inference import SNPE inference = SNPE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x , proposal = proposal ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Truncated proposals for scalable and hassle-free simulation-based inference by Deistler, Goncalves & Macke (NeurIPS 2022) [Paper] from sbi.inference import SNPE from sbi.utils import get_density_thresholder , RestrictedPrior inference = SNPE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( force_first_round_loss = True ) posterior = inference . build_posterior () . set_default_x ( x_o ) accept_reject_fn = get_density_thresholder ( posterior , quantile = 1e-4 ) proposal = RestrictedPrior ( prior , accept_reject_fn , sample_with = \"rejection\" )","title":"Posterior estimation (SNPE)"},{"location":"tutorial/16_implemented_methods/#likelihood-estimation-snle","text":"Sequential neural likelihood: Fast likelihood-free inference with autoregressive flows by Papamakarios, Sterratt & Murray (AISTATS 2019) [PDF] [BibTeX] from sbi.inference import SNLE inference = SNLE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Variational methods for simulation-based inference by Gl\u00f6ckler, Deistler, Macke (ICLR 2022) [Paper] from sbi.inference import SNLE inference = SNLE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior ( sample_with = \"vi\" , vi_method = \"fKL\" ) . set_default_x ( x_o ) proposal = posterior Flexible and efficient simulation-based inference for models of decision-making by Boelts, Lueckmann, Gao, Macke (Elife 2022) [Paper] from sbi.inference import MNLE inference = MNLE ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o )","title":"Likelihood estimation (SNLE)"},{"location":"tutorial/16_implemented_methods/#likelihood-ratio-estimation-snre","text":"Likelihood-free MCMC with Amortized Approximate Likelihood Ratios by Hermans, Begy & Louppe (ICML 2020) [PDF] from sbi.inference import SNRE_A inference = SNRE_A ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) On Contrastive Learning for Likelihood-free Inference Durkan, Murray & Papamakarios (ICML 2020) [PDF] . from sbi.inference import SNRE inference = SNRE ( prior ) proposal = prior for _ in range ( rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train () posterior = inference . build_posterior () . set_default_x ( x_o ) proposal = posterior Towards Reliable Simulation-Based Inference with Balanced Neural Ratio Estimation by Delaunoy, Hermans, Rozet, Wehenkel & Louppe (NeurIPS 2022) [PDF] from sbi.inference import BNRE inference = BNRE ( prior ) theta = prior . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( regularization_strength = 100. ) posterior = inference . build_posterior () . set_default_x ( x_o ) Contrastive Neural Ratio Estimation Benjamin Kurt Miller, Christoph Weniger, Patrick Forr\u00e9 (NeurIPS 2022) [PDF] # The main feature of NRE-C is producing an exact ratio of densities at optimum, even when using multiple contrastive pairs (classes). from sbi.inference import SNRE_C # Amortized inference inference = SNRE_C ( prior ) proposal = prior theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) _ = inference . append_simulations ( theta , x ) . train ( num_classes = 5 , # SNRE_C sees `2 * num_classes - 1` marginally drawn contrastive pairs. gamma = 1.0 , # SNRE_C can control the weight between terms in its loss function. ) posterior = inference . build_posterior () . set_default_x ( x_o )","title":"Likelihood-ratio estimation (SNRE)"},{"location":"tutorial/16_implemented_methods/#utilities","text":"Simulation-based calibration by Talts, Betancourt, Simpson, Vehtari, Gelman (arxiv 2018) [Paper] ) from sbi.analysis import run_sbc , sbc_rank_plot thetas = prior . sample (( 1_000 ,)) xs = simulator ( thetas ) ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = 1_000 ) _ = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , ) Restriction estimator by Deistler, Macke & Goncalves (PNAS 2022) [Paper] from sbi.inference import SNPE from sbi.utils import RestrictionEstimator restriction_estimator = RestrictionEstimator ( prior = prior ) proposal = prior for _ in range ( num_rounds ): theta = proposal . sample (( num_sims ,)) x = simulator ( theta ) restriction_estimator . append_simulations ( theta , x ) classifier = restriction_estimator . train () proposal = restriction_estimator . restrict_prior () all_theta , all_x , _ = restriction_estimator . get_simulations () inference = SNPE ( prior ) density_estimator = inference . append_simulations ( all_theta , all_x ) . train () posterior = inference . build_posterior () Expected coverage (sample-based) as computed in Deistler, Goncalves, Macke (Neurips 2022) [Paper] and in Rozet, Louppe (2021) [Paper] from sbi.analysis import run_sbc , sbc_rank_plot thetas = prior . sample (( 1_000 ,)) xs = simulator ( thetas ) ranks , dap_samples = run_sbc ( thetas , xs , posterior , num_posterior_samples = 1_000 , reduce_fns = posterior . log_prob ) _ = sbc_rank_plot ( ranks = ranks , num_posterior_samples = num_posterior_samples , plot_type = \"hist\" , num_bins = None , )","title":"Utilities"},{"location":"tutorial/17_SBI_for_models_of_decision_making/","text":"SBI with mixed data, iid data, and experimental conditions \u00b6 For a general tutorial on using SBI with trial-based iid data, see tutorial 14 . Here, we cover the use-case often occurring in models of decision-making: trial-based data with mixed data types and varying experimental conditions. Trial-based SBI with mixed data types \u00b6 In some cases, models with trial-based data additionally return data with mixed data types, e.g., continous and discrete data. For example, most computational models of decision-making have continuous reaction times and discrete choices as output. This can induce a problem when performing trial-based SBI that relies on learning a neural likelihood: It is challenging for most density estimators to handle both, continuous and discrete data at the same time. However, there is a recent SBI method for solving this problem, it\u2019s called Mixed Neural Likelihood Estimation (MNLE). It works just like NLE, but with mixed data types. The trick is that it learns two separate density estimators, one for the discrete part of the data, and one for the continuous part, and combines the two to obtain the final neural likelihood. Crucially, the continuous density estimator is trained conditioned on the output of the discrete one, such that statistical dependencies between the discrete and continuous data (e.g., between choices and reaction times) are modeled as well. The interested reader is referred to the original paper available here . MNLE was recently added to sbi (see this PR and also issue ) and follow the same API as SNLE . In this tutorial we will show how to apply MNLE to mixed data, and how to deal with varying experimental conditions. Toy problem for MNLE \u00b6 To illustrate MNLE we set up a toy simulator that outputs mixed data and for which we know the likelihood such we can obtain reference posterior samples via MCMC. Simulator : To simulate mixed data we do the following Sample reaction time from inverse Gamma Sample choices from Binomial Return reaction time \\(rt \\in (0, \\infty)\\) and choice index \\(c \\in \\{0, 1\\}\\) \\[ c \\sim \\text{Binomial}(\\rho) \\\\ rt \\sim \\text{InverseGamma}(\\alpha=2, \\beta) \\\\ \\] Prior : The priors of the two parameters \\(\\rho\\) and \\(\\beta\\) are independent. We define a Beta prior over the probabilty parameter of the Binomial used in the simulator and a Gamma prior over the shape-parameter of the inverse Gamma used in the simulator: \\[ p(\\beta, \\rho) = p(\\beta) \\; p(\\rho) ; \\\\ p(\\beta) = \\text{Gamma}(1, 0.5) \\\\ p(\\text{probs}) = \\text{Beta}(2, 2) \\] Because the InverseGamma and the Binomial likelihoods are well-defined we can perform MCMC on this problem and obtain reference-posterior samples. import matplotlib.pyplot as plt import torch from torch import Tensor from sbi.inference import MNLE from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Categorical , Gamma from sbi.utils import MultipleIndependent from sbi.utils.metrics import c2st from sbi.analysis import pairplot from sbi.inference import MCMCPosterior from sbi.utils.torchutils import atleast_2d from sbi.inference.potentials.likelihood_based_potential import ( MixedLikelihoodBasedPotential , ) from sbi.utils.conditional_density_utils import ConditionedPotential from sbi.utils import mcmc_transform from sbi.inference.potentials.base_potential import BasePotential # Toy simulator for mixed data def mixed_simulator ( theta : Tensor , concentration_scaling : float = 1.0 ): \"\"\"Returns a sample from a mixed distribution given parameters theta. Args: theta: batch of parameters, shape (batch_size, 2) concentration_scaling: scaling factor for the concentration parameter of the InverseGamma distribution, mimics an experimental condition. \"\"\" beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = concentration_scaling * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # The potential function defines the ground truth likelihood and allows us to obtain reference posterior samples via MCMC. class PotentialFunctionProvider ( BasePotential ): allow_iid_x = True # type: ignore def __init__ ( self , prior , x_o , concentration_scaling = 1.0 , device = \"cpu\" ): super () . __init__ ( prior , x_o , device ) self . concentration_scaling = concentration_scaling def __call__ ( self , theta , track_gradients : bool = True ): theta = atleast_2d ( theta ) with torch . set_grad_enabled ( track_gradients ): iid_ll = self . iid_likelihood ( theta ) return iid_ll + self . prior . log_prob ( theta ) def iid_likelihood ( self , theta ): lp_choices = torch . stack ( [ Binomial ( probs = th . reshape ( 1 , - 1 )) . log_prob ( self . x_o [:, 1 :]) for th in theta [:, 1 :] ], dim = 1 , ) lp_rts = torch . stack ( [ InverseGamma ( concentration = self . concentration_scaling * torch . ones_like ( beta_i ), rate = beta_i , ) . log_prob ( self . x_o [:, : 1 ]) for beta_i in theta [:, : 1 ] ], dim = 1 , ) joint_likelihood = ( lp_choices + lp_rts ) . squeeze () assert joint_likelihood . shape == torch . Size ([ self . x_o . shape [ 0 ], theta . shape [ 0 ]]) return joint_likelihood . sum ( 0 ) # Define independent prior. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , ) Obtain reference-posterior samples via analytical likelihood and MCMC \u00b6 torch . manual_seed ( 42 ) num_trials = 10 num_samples = 1000 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) mcmc_kwargs = dict ( num_chains = 20 , warmup_steps = 50 , method = \"slice_np_vectorized\" , init_strategy = \"proposal\" , ) true_posterior = MCMCPosterior ( potential_fn = PotentialFunctionProvider ( prior , x_o ), proposal = prior , theta_transform = mcmc_transform ( prior , enable_transform = True ), ** mcmc_kwargs , ) true_samples = true_posterior . sample (( num_samples ,)) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 10 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Train MNLE and generate samples via MCMC \u00b6 # Training data num_simulations = 20000 # For training the MNLE emulator we need to define a proposal distribution, the prior is # a good choice. proposal = prior theta = proposal . sample (( num_simulations ,)) x = mixed_simulator ( theta ) # Train MNLE and obtain MCMC-based posterior. trainer = MNLE () estimator = trainer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ) /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 73 epochs. # Build posterior from the trained estimator and prior. mnle_posterior = trainer . build_posterior ( prior = prior ) mnle_samples = mnle_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Compare MNLE and reference posterior \u00b6 # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_samples , mnle_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); We see that the inferred MNLE posterior nicely matches the reference posterior, and how both inferred a posterior that is quite different from the prior. Because MNLE training is amortized we can obtain another posterior given a different observation with potentially a different number of trials, just by running MCMC again (without re-training MNLE ): Repeat inference with different x_o that contains more trials \u00b6 num_trials = 50 x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) true_samples = true_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) mnle_samples = mnle_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 50 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_samples , mnle_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); print ( c2st ( true_samples , mnle_samples )[ 0 ]) tensor(0.5565) Again we can see that the posteriors match nicely. In addition, we observe that the posterior\u2019s (epistemic) uncertainty reduces as we increase the number of trials. Note: MNLE is trained on single-trial data. Theoretically, density estimation is perfectly accurate only in the limit of infinite training data. Thus, training with a finite amount of training data naturally induces a small bias in the density estimator. As we observed above, this bias is so small that we don\u2019t really notice it, e.g., the c2st scores were close to 0.5. However, when we increase the number of trials in x_o dramatically (on the order of 1000s) the small bias can accumulate over the trials and inference with MNLE can become less accurate. MNLE with experimental conditions \u00b6 In the perceptual decision-making research it is common to design experiments with varying experimental decisions, e.g., to vary the difficulty of the task. During parameter inference, it can be beneficial to incorporate the experimental conditions. In MNLE, we are learning an emulator that should be able to generate synthetic experimental data including reaction times and choices given different experimental conditions. Thus, to make MNLE work with experimental conditions, we need to include them in the training process, i.e., treat them like auxiliary parameters of the simulator: # define a simulator wrapper in which the experimental condition are contained in theta and passed to the simulator. def sim_wrapper ( theta ): # simulate with experiment conditions return mixed_simulator ( theta = theta [:, : 2 ], concentration_scaling = theta [:, 2 :] + 1 , # add 1 to deal with 0 values from Categorical distribution ) # Define a proposal that contains both, priors for the parameters and a discrte prior over experimental conditions. proposal = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), Categorical ( probs = torch . ones ( 1 , 3 )), ], validate_args = False , ) # Simulated data num_simulations = 10000 num_samples = 1000 theta = proposal . sample (( num_simulations ,)) x = sim_wrapper ( theta ) assert x . shape == ( num_simulations , 2 ) # simulate observed data and define ground truth parameters num_trials = 10 theta_o = proposal . sample (( 1 ,)) theta_o [ 0 , 2 ] = 2.0 # set condition to 2 as in original simulator. x_o = sim_wrapper ( theta_o . repeat ( num_trials , 1 )) Obtain ground truth posterior via MCMC \u00b6 We obtain a ground-truth posterior via MCMC by using the PotentialFunctionProvider. For that, we first the define the actual prior, i.e., the distribution over the parameter we want to infer (not the proposal). Thus, we leave out the discrete prior over experimental conditions. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , ) prior_transform = mcmc_transform ( prior ) # We can now use the PotentialFunctionProvider to obtain a ground-truth posterior via MCMC. true_posterior_samples = MCMCPosterior ( PotentialFunctionProvider ( prior , x_o , concentration_scaling = float ( theta_o [ 0 , 2 ]) + 1.0 , # add one because the sim_wrapper adds one (see above) ), theta_transform = prior_transform , proposal = prior , ** mcmc_kwargs , ) . sample (( num_samples ,), show_progress_bars = True ) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 10 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Train MNLE including experimental conditions \u00b6 trainer = MNLE ( proposal ) estimator = trainer . append_simulations ( theta , x ) . train ( training_batch_size = 100 ) /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 73 epochs. Construct conditional potential function \u00b6 To obtain posterior samples conditioned on a particular experimental condition (and on x_o), we need to construct a corresponding potential function. # First, we define the potential function for the complete, unconditional MNLE-likelihood. potential_fn = MixedLikelihoodBasedPotential ( estimator , proposal , x_o ) # Then we use the potential to construct the conditional potential function. # Here, we tell the constructor to condition on the last dimension (index 2) by passing dims_to_sample=[0, 1]. conditioned_potential_fn = ConditionedPotential ( potential_fn , condition = theta_o , dims_to_sample = [ 0 , 1 ], allow_iid_x = True , # we also need to explicitly tell that MNLE allows iid_x ) # Using this potential function, we can now obtain conditional samples. mnle_posterior = MCMCPosterior ( potential_fn = conditioned_potential_fn , theta_transform = prior_transform , proposal = prior , ** mcmc_kwargs ) conditional_samples = mnle_posterior . sample (( num_samples ,), x = x_o ) Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] # Finally, we can compare the ground truth conditional posterior with the MNLE-conditional posterior. fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_posterior_samples , conditional_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); They match accurately, showing that we can indeed post-hoc condition the trained MNLE likelihood on different experimental conditions.","title":"SBI with mixed data, iid data, and experimental conditions"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#sbi-with-mixed-data-iid-data-and-experimental-conditions","text":"For a general tutorial on using SBI with trial-based iid data, see tutorial 14 . Here, we cover the use-case often occurring in models of decision-making: trial-based data with mixed data types and varying experimental conditions.","title":"SBI with mixed data, iid data, and experimental conditions"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#trial-based-sbi-with-mixed-data-types","text":"In some cases, models with trial-based data additionally return data with mixed data types, e.g., continous and discrete data. For example, most computational models of decision-making have continuous reaction times and discrete choices as output. This can induce a problem when performing trial-based SBI that relies on learning a neural likelihood: It is challenging for most density estimators to handle both, continuous and discrete data at the same time. However, there is a recent SBI method for solving this problem, it\u2019s called Mixed Neural Likelihood Estimation (MNLE). It works just like NLE, but with mixed data types. The trick is that it learns two separate density estimators, one for the discrete part of the data, and one for the continuous part, and combines the two to obtain the final neural likelihood. Crucially, the continuous density estimator is trained conditioned on the output of the discrete one, such that statistical dependencies between the discrete and continuous data (e.g., between choices and reaction times) are modeled as well. The interested reader is referred to the original paper available here . MNLE was recently added to sbi (see this PR and also issue ) and follow the same API as SNLE . In this tutorial we will show how to apply MNLE to mixed data, and how to deal with varying experimental conditions.","title":"Trial-based SBI with mixed data types"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#toy-problem-for-mnle","text":"To illustrate MNLE we set up a toy simulator that outputs mixed data and for which we know the likelihood such we can obtain reference posterior samples via MCMC. Simulator : To simulate mixed data we do the following Sample reaction time from inverse Gamma Sample choices from Binomial Return reaction time \\(rt \\in (0, \\infty)\\) and choice index \\(c \\in \\{0, 1\\}\\) \\[ c \\sim \\text{Binomial}(\\rho) \\\\ rt \\sim \\text{InverseGamma}(\\alpha=2, \\beta) \\\\ \\] Prior : The priors of the two parameters \\(\\rho\\) and \\(\\beta\\) are independent. We define a Beta prior over the probabilty parameter of the Binomial used in the simulator and a Gamma prior over the shape-parameter of the inverse Gamma used in the simulator: \\[ p(\\beta, \\rho) = p(\\beta) \\; p(\\rho) ; \\\\ p(\\beta) = \\text{Gamma}(1, 0.5) \\\\ p(\\text{probs}) = \\text{Beta}(2, 2) \\] Because the InverseGamma and the Binomial likelihoods are well-defined we can perform MCMC on this problem and obtain reference-posterior samples. import matplotlib.pyplot as plt import torch from torch import Tensor from sbi.inference import MNLE from pyro.distributions import InverseGamma from torch.distributions import Beta , Binomial , Categorical , Gamma from sbi.utils import MultipleIndependent from sbi.utils.metrics import c2st from sbi.analysis import pairplot from sbi.inference import MCMCPosterior from sbi.utils.torchutils import atleast_2d from sbi.inference.potentials.likelihood_based_potential import ( MixedLikelihoodBasedPotential , ) from sbi.utils.conditional_density_utils import ConditionedPotential from sbi.utils import mcmc_transform from sbi.inference.potentials.base_potential import BasePotential # Toy simulator for mixed data def mixed_simulator ( theta : Tensor , concentration_scaling : float = 1.0 ): \"\"\"Returns a sample from a mixed distribution given parameters theta. Args: theta: batch of parameters, shape (batch_size, 2) concentration_scaling: scaling factor for the concentration parameter of the InverseGamma distribution, mimics an experimental condition. \"\"\" beta , ps = theta [:, : 1 ], theta [:, 1 :] choices = Binomial ( probs = ps ) . sample () rts = InverseGamma ( concentration = concentration_scaling * torch . ones_like ( beta ), rate = beta ) . sample () return torch . cat (( rts , choices ), dim = 1 ) # The potential function defines the ground truth likelihood and allows us to obtain reference posterior samples via MCMC. class PotentialFunctionProvider ( BasePotential ): allow_iid_x = True # type: ignore def __init__ ( self , prior , x_o , concentration_scaling = 1.0 , device = \"cpu\" ): super () . __init__ ( prior , x_o , device ) self . concentration_scaling = concentration_scaling def __call__ ( self , theta , track_gradients : bool = True ): theta = atleast_2d ( theta ) with torch . set_grad_enabled ( track_gradients ): iid_ll = self . iid_likelihood ( theta ) return iid_ll + self . prior . log_prob ( theta ) def iid_likelihood ( self , theta ): lp_choices = torch . stack ( [ Binomial ( probs = th . reshape ( 1 , - 1 )) . log_prob ( self . x_o [:, 1 :]) for th in theta [:, 1 :] ], dim = 1 , ) lp_rts = torch . stack ( [ InverseGamma ( concentration = self . concentration_scaling * torch . ones_like ( beta_i ), rate = beta_i , ) . log_prob ( self . x_o [:, : 1 ]) for beta_i in theta [:, : 1 ] ], dim = 1 , ) joint_likelihood = ( lp_choices + lp_rts ) . squeeze () assert joint_likelihood . shape == torch . Size ([ self . x_o . shape [ 0 ], theta . shape [ 0 ]]) return joint_likelihood . sum ( 0 ) # Define independent prior. prior = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), ], validate_args = False , )","title":"Toy problem for MNLE"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#obtain-reference-posterior-samples-via-analytical-likelihood-and-mcmc","text":"torch . manual_seed ( 42 ) num_trials = 10 num_samples = 1000 theta_o = prior . sample (( 1 ,)) x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) mcmc_kwargs = dict ( num_chains = 20 , warmup_steps = 50 , method = \"slice_np_vectorized\" , init_strategy = \"proposal\" , ) true_posterior = MCMCPosterior ( potential_fn = PotentialFunctionProvider ( prior , x_o ), proposal = prior , theta_transform = mcmc_transform ( prior , enable_transform = True ), ** mcmc_kwargs , ) true_samples = true_posterior . sample (( num_samples ,)) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 10 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s]","title":"Obtain reference-posterior samples via analytical likelihood and MCMC"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#train-mnle-and-generate-samples-via-mcmc","text":"# Training data num_simulations = 20000 # For training the MNLE emulator we need to define a proposal distribution, the prior is # a good choice. proposal = prior theta = proposal . sample (( num_simulations ,)) x = mixed_simulator ( theta ) # Train MNLE and obtain MCMC-based posterior. trainer = MNLE () estimator = trainer . append_simulations ( theta , x ) . train ( training_batch_size = 1000 ) /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 73 epochs. # Build posterior from the trained estimator and prior. mnle_posterior = trainer . build_posterior ( prior = prior ) mnle_samples = mnle_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s]","title":"Train MNLE and generate samples via MCMC"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#compare-mnle-and-reference-posterior","text":"# Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_samples , mnle_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); We see that the inferred MNLE posterior nicely matches the reference posterior, and how both inferred a posterior that is quite different from the prior. Because MNLE training is amortized we can obtain another posterior given a different observation with potentially a different number of trials, just by running MCMC again (without re-training MNLE ):","title":"Compare MNLE and reference posterior"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#repeat-inference-with-different-x_o-that-contains-more-trials","text":"num_trials = 50 x_o = mixed_simulator ( theta_o . repeat ( num_trials , 1 )) true_samples = true_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) mnle_samples = mnle_posterior . sample (( num_samples ,), x = x_o , ** mcmc_kwargs ) /Users/janbolts/qode/sbi/sbi/utils/sbiutils.py:342: UserWarning: An x with a batch size of 50 was passed. It will be interpreted as a batch of independent and identically distributed data X={x_1, ..., x_n}, i.e., data generated based on the same underlying (unknown) parameter. The resulting posterior will be with respect to entire batch, i.e,. p(theta | X). warnings.warn( Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] # Plot them in one pairplot as contours (obtained via KDE on the samples). fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_samples , mnle_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); print ( c2st ( true_samples , mnle_samples )[ 0 ]) tensor(0.5565) Again we can see that the posteriors match nicely. In addition, we observe that the posterior\u2019s (epistemic) uncertainty reduces as we increase the number of trials. Note: MNLE is trained on single-trial data. Theoretically, density estimation is perfectly accurate only in the limit of infinite training data. Thus, training with a finite amount of training data naturally induces a small bias in the density estimator. As we observed above, this bias is so small that we don\u2019t really notice it, e.g., the c2st scores were close to 0.5. However, when we increase the number of trials in x_o dramatically (on the order of 1000s) the small bias can accumulate over the trials and inference with MNLE can become less accurate.","title":"Repeat inference with different x_o that contains more trials"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#mnle-with-experimental-conditions","text":"In the perceptual decision-making research it is common to design experiments with varying experimental decisions, e.g., to vary the difficulty of the task. During parameter inference, it can be beneficial to incorporate the experimental conditions. In MNLE, we are learning an emulator that should be able to generate synthetic experimental data including reaction times and choices given different experimental conditions. Thus, to make MNLE work with experimental conditions, we need to include them in the training process, i.e., treat them like auxiliary parameters of the simulator: # define a simulator wrapper in which the experimental condition are contained in theta and passed to the simulator. def sim_wrapper ( theta ): # simulate with experiment conditions return mixed_simulator ( theta = theta [:, : 2 ], concentration_scaling = theta [:, 2 :] + 1 , # add 1 to deal with 0 values from Categorical distribution ) # Define a proposal that contains both, priors for the parameters and a discrte prior over experimental conditions. proposal = MultipleIndependent ( [ Gamma ( torch . tensor ([ 1.0 ]), torch . tensor ([ 0.5 ])), Beta ( torch . tensor ([ 2.0 ]), torch . tensor ([ 2.0 ])), Categorical ( probs = torch . ones ( 1 , 3 )), ], validate_args = False , ) # Simulated data num_simulations = 10000 num_samples = 1000 theta = proposal . sample (( num_simulations ,)) x = sim_wrapper ( theta ) assert x . shape == ( num_simulations , 2 ) # simulate observed data and define ground truth parameters num_trials = 10 theta_o = proposal . sample (( 1 ,)) theta_o [ 0 , 2 ] = 2.0 # set condition to 2 as in original simulator. x_o = sim_wrapper ( theta_o . repeat ( num_trials , 1 ))","title":"MNLE with experimental conditions"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#train-mnle-including-experimental-conditions","text":"trainer = MNLE ( proposal ) estimator = trainer . append_simulations ( theta , x ) . train ( training_batch_size = 100 ) /Users/janbolts/qode/sbi/sbi/neural_nets/mnle.py:60: UserWarning: The mixed neural likelihood estimator assumes that x contains continuous data in the first n-1 columns (e.g., reaction times) and categorical data in the last column (e.g., corresponding choices). If this is not the case for the passed `x` do not use this function. warnings.warn( Neural network successfully converged after 73 epochs.","title":"Train MNLE including experimental conditions"},{"location":"tutorial/17_SBI_for_models_of_decision_making/#construct-conditional-potential-function","text":"To obtain posterior samples conditioned on a particular experimental condition (and on x_o), we need to construct a corresponding potential function. # First, we define the potential function for the complete, unconditional MNLE-likelihood. potential_fn = MixedLikelihoodBasedPotential ( estimator , proposal , x_o ) # Then we use the potential to construct the conditional potential function. # Here, we tell the constructor to condition on the last dimension (index 2) by passing dims_to_sample=[0, 1]. conditioned_potential_fn = ConditionedPotential ( potential_fn , condition = theta_o , dims_to_sample = [ 0 , 1 ], allow_iid_x = True , # we also need to explicitly tell that MNLE allows iid_x ) # Using this potential function, we can now obtain conditional samples. mnle_posterior = MCMCPosterior ( potential_fn = conditioned_potential_fn , theta_transform = prior_transform , proposal = prior , ** mcmc_kwargs ) conditional_samples = mnle_posterior . sample (( num_samples ,), x = x_o ) Running vectorized MCMC with 20 chains: 0%| | 0/20000 [00:00<?, ?it/s] # Finally, we can compare the ground truth conditional posterior with the MNLE-conditional posterior. fig , ax = pairplot ( [ prior . sample (( 1000 ,)), true_posterior_samples , conditional_samples , ], points = theta_o , diag = \"kde\" , upper = \"contour\" , kde_offdiag = dict ( bins = 50 ), kde_diag = dict ( bins = 100 ), contour_offdiag = dict ( levels = [ 0.95 ]), points_colors = [ \"k\" ], points_offdiag = dict ( marker = \"*\" , markersize = 10 ), labels = [ r \"$\\beta$\" , r \"$\\rho$\" ], ) plt . sca ( ax [ 1 , 1 ]) plt . legend ( [ \"Prior\" , \"Reference\" , \"MNLE\" , r \"$\\theta_o$\" ], frameon = False , fontsize = 12 , ); They match accurately, showing that we can indeed post-hoc condition the trained MNLE likelihood on different experimental conditions.","title":"Construct conditional potential function"},{"location":"tutorial/17_vi_posteriors/","text":"Using Variational Inference for Building Posteriors \u00b6 If one uses SNPE, then the posterior can be sampled from directly (without MCMC). Contrary to that, for SNLE or SNRE, MCMC sampling is required, which is computationally expensive. With SNVI (sequential neural variational inference), it is possible to directly sample from the posterior without any corrections during training or without expensive MCMC for sampling. This is possible by learning the posterior with variational inference techniques. For this, an additional network (one for the likelihood or likelihood-to-evidence-ratio) must be trained first. Main syntax \u00b6 inference = SNLE ( prior ) for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) # In `SNLE` and `SNRE`, you should not pass the `proposal` to ` # .append_simulations()`. likelihood_estimator = inference . append_simulations ( theta , x , ) . train () # Obtain potential (learned likelihood * prior) and theta transformation. potential_fn , theta_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) # Build posterior via variational inference. posterior = VIPosterior ( potential_fn , prior , \"maf\" , theta_transform , vi_method = \"fKL\" , ) . train () proposal = posterior Linear Gaussian example \u00b6 Below, we give a full example of inferring the posterior distribution with SNVI over multiple rounds. For this, we take the same example as in the previous tutorial. import torch from sbi.inference import ( likelihood_estimator_based_potential , SNLE , prepare_for_sbi , simulate_for_sbi , VIPosterior , ) from sbi import utils as utils from sbi import analysis as analysis _ = torch . manual_seed ( 0 ) num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) Here, we decide to learn the likelihood directly, but learning the likelihood-to-evidence ratio would also be a viable option. inference = SNLE ( prior = prior ) Now we can run inference, where we first learn the likelihood, which is then in turn used to learn a posterior through variational inference. num_rounds = 2 x_o = torch . zeros ( 3 ,) posteriors = [] proposal = prior for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) likelihood_estimator = inference . append_simulations ( theta , x , ) . train () potential_fn , theta_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) posterior = VIPosterior ( potential_fn , prior , \"maf\" , theta_transform , vi_method = \"fKL\" , ) . train () posteriors . append ( posterior ) proposal = posterior Running 500 simulations.: 0%| | 0/500 [00:00<?, ?it/s] Neural network successfully converged after 50 epochs. 0%| | 0/2000 [00:00<?, ?it/s] Converged with loss: -4.23 Quality Score: -0.218 Good: Smaller than 0.5 Bad: Larger than 1.0 NOTE: Less sensitive to mode collapse. Running 500 simulations.: 0%| | 0/500 [00:00<?, ?it/s] Neural network successfully converged after 47 epochs. 0%| | 0/2000 [00:00<?, ?it/s] Converged with loss: -4.2 Quality Score: 0.025 Good: Smaller than 0.5 Bad: Larger than 1.0 NOTE: Less sensitive to mode collapse. After having learned the posterior, we use .pairplot() to visualize the samples from the learned posterior. posterior_samples = posterior . sample (( 10000 ,), x = x_o ) # Plot posterior samples. _ = analysis . pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) )","title":"Using Variational Inference for Building Posteriors"},{"location":"tutorial/17_vi_posteriors/#using-variational-inference-for-building-posteriors","text":"If one uses SNPE, then the posterior can be sampled from directly (without MCMC). Contrary to that, for SNLE or SNRE, MCMC sampling is required, which is computationally expensive. With SNVI (sequential neural variational inference), it is possible to directly sample from the posterior without any corrections during training or without expensive MCMC for sampling. This is possible by learning the posterior with variational inference techniques. For this, an additional network (one for the likelihood or likelihood-to-evidence-ratio) must be trained first.","title":"Using Variational Inference for Building Posteriors"},{"location":"tutorial/17_vi_posteriors/#main-syntax","text":"inference = SNLE ( prior ) for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) # In `SNLE` and `SNRE`, you should not pass the `proposal` to ` # .append_simulations()`. likelihood_estimator = inference . append_simulations ( theta , x , ) . train () # Obtain potential (learned likelihood * prior) and theta transformation. potential_fn , theta_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) # Build posterior via variational inference. posterior = VIPosterior ( potential_fn , prior , \"maf\" , theta_transform , vi_method = \"fKL\" , ) . train () proposal = posterior","title":"Main syntax"},{"location":"tutorial/17_vi_posteriors/#linear-gaussian-example","text":"Below, we give a full example of inferring the posterior distribution with SNVI over multiple rounds. For this, we take the same example as in the previous tutorial. import torch from sbi.inference import ( likelihood_estimator_based_potential , SNLE , prepare_for_sbi , simulate_for_sbi , VIPosterior , ) from sbi import utils as utils from sbi import analysis as analysis _ = torch . manual_seed ( 0 ) num_dim = 3 prior = utils . BoxUniform ( low =- 2 * torch . ones ( num_dim ), high = 2 * torch . ones ( num_dim )) def linear_gaussian ( theta ): return theta + 1.0 + torch . randn_like ( theta ) * 0.1 simulator , prior = prepare_for_sbi ( linear_gaussian , prior ) Here, we decide to learn the likelihood directly, but learning the likelihood-to-evidence ratio would also be a viable option. inference = SNLE ( prior = prior ) Now we can run inference, where we first learn the likelihood, which is then in turn used to learn a posterior through variational inference. num_rounds = 2 x_o = torch . zeros ( 3 ,) posteriors = [] proposal = prior for _ in range ( num_rounds ): theta , x = simulate_for_sbi ( simulator , proposal , num_simulations = 500 ) likelihood_estimator = inference . append_simulations ( theta , x , ) . train () potential_fn , theta_transform = likelihood_estimator_based_potential ( likelihood_estimator , prior , x_o ) posterior = VIPosterior ( potential_fn , prior , \"maf\" , theta_transform , vi_method = \"fKL\" , ) . train () posteriors . append ( posterior ) proposal = posterior Running 500 simulations.: 0%| | 0/500 [00:00<?, ?it/s] Neural network successfully converged after 50 epochs. 0%| | 0/2000 [00:00<?, ?it/s] Converged with loss: -4.23 Quality Score: -0.218 Good: Smaller than 0.5 Bad: Larger than 1.0 NOTE: Less sensitive to mode collapse. Running 500 simulations.: 0%| | 0/500 [00:00<?, ?it/s] Neural network successfully converged after 47 epochs. 0%| | 0/2000 [00:00<?, ?it/s] Converged with loss: -4.2 Quality Score: 0.025 Good: Smaller than 0.5 Bad: Larger than 1.0 NOTE: Less sensitive to mode collapse. After having learned the posterior, we use .pairplot() to visualize the samples from the learned posterior. posterior_samples = posterior . sample (( 10000 ,), x = x_o ) # Plot posterior samples. _ = analysis . pairplot ( posterior_samples , limits = [[ - 2 , 2 ], [ - 2 , 2 ], [ - 2 , 2 ]], figsize = ( 5 , 5 ) )","title":"Linear Gaussian example"}]}
\ No newline at end of file
diff --git a/sitemap.xml b/sitemap.xml
index 527d59873..7ea59a995 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -2,182 +2,182 @@
 <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
     <url>
          <loc>https://sbi-dev.github.io/sbi/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/citation/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/code_of_conduct/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/contribute/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/credits/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/faq/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/install/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/reference/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/examples/00_HH_simulator/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/examples/01_decision_making_model/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/faq/question_01/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/faq/question_02/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/faq/question_03/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/faq/question_04/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/faq/question_05/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/faq/question_06/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/faq/question_07/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/00_getting_started/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/01_gaussian_amortized/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/02_flexible_interface/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/03_multiround_inference/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/04_density_estimators/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/05_embedding_net/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/07_conditional_distributions/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/08_restriction_estimator/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/09_sensitivity_analysis/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/10_crafting_summary_statistics/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/11_sampler_interface/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/12_diagnostics_posterior_predictive_check/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/13_diagnostics_simulation_based_calibration/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/14_iid_data_and_permutation_invariant_embeddings/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/14_iid_data_and_permutation_invarient_embeddings/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/15_mcmc_diagnostics_with_arviz/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/16_implemented_methods/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/17_SBI_for_models_of_decision_making/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
     <url>
          <loc>https://sbi-dev.github.io/sbi/tutorial/17_vi_posteriors/</loc>
-         <lastmod>2023-11-02</lastmod>
+         <lastmod>2023-11-03</lastmod>
          <changefreq>daily</changefreq>
     </url>
 </urlset>
\ No newline at end of file
diff --git a/sitemap.xml.gz b/sitemap.xml.gz
index 67e4defa53a92e3131362ec4958e9b01a42d18cc..1659e99043d4037b84ce97d55d2e0f22fbbdb084 100644
GIT binary patch
delta 30
mcmeBR?O<h>@8;l$*yb{keGjAQ#v^+eIiBRrev>N6zyJW0-wH_p

delta 30
mcmeBR?O<h>@8;l`e#&_w`yNKojYsw{a-?PiM5jtJFaQ9N4hb3n

diff --git a/tutorial/00_getting_started/index.html b/tutorial/00_getting_started/index.html
index ab6c7dd1c..55f05d55a 100644
--- a/tutorial/00_getting_started/index.html
+++ b/tutorial/00_getting_started/index.html
@@ -346,47 +346,6 @@
     Next steps
   </a>
   
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#requirements-for-the-simulator-prior-and-observation" class="md-nav__link">
-    Requirements for the simulator, prior, and observation
-  </a>
-  
-    <nav class="md-nav" aria-label="Requirements for the simulator, prior, and observation">
-      <ul class="md-nav__list">
-        
-          <li class="md-nav__item">
-  <a href="#prior" class="md-nav__link">
-    Prior
-  </a>
-  
-</li>
-        
-          <li class="md-nav__item">
-  <a href="#simulator" class="md-nav__link">
-    Simulator
-  </a>
-  
-</li>
-        
-          <li class="md-nav__item">
-  <a href="#observation" class="md-nav__link">
-    Observation
-  </a>
-  
-</li>
-        
-      </ul>
-    </nav>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#running-different-algorithms" class="md-nav__link">
-    Running different algorithms
-  </a>
-  
 </li>
       
     </ul>
@@ -403,20 +362,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -432,8 +377,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -507,8 +452,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -549,8 +494,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -563,8 +508,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -577,8 +522,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
@@ -941,47 +886,6 @@
     Next steps
   </a>
   
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#requirements-for-the-simulator-prior-and-observation" class="md-nav__link">
-    Requirements for the simulator, prior, and observation
-  </a>
-  
-    <nav class="md-nav" aria-label="Requirements for the simulator, prior, and observation">
-      <ul class="md-nav__list">
-        
-          <li class="md-nav__item">
-  <a href="#prior" class="md-nav__link">
-    Prior
-  </a>
-  
-</li>
-        
-          <li class="md-nav__item">
-  <a href="#simulator" class="md-nav__link">
-    Simulator
-  </a>
-  
-</li>
-        
-          <li class="md-nav__item">
-  <a href="#observation" class="md-nav__link">
-    Observation
-  </a>
-  
-</li>
-        
-      </ul>
-    </nav>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#running-different-algorithms" class="md-nav__link">
-    Running different algorithms
-  </a>
-  
 </li>
       
     </ul>
@@ -1017,12 +921,12 @@ <h2 id="running-the-inference-procedure">Running the inference procedure<a class
 <div class="highlight"><pre><span></span><code><span class="n">num_dim</span> <span class="o">=</span> <span class="mi">3</span>
 <span class="n">prior</span> <span class="o">=</span> <span class="n">utils</span><span class="o">.</span><span class="n">BoxUniform</span><span class="p">(</span><span class="n">low</span><span class="o">=-</span><span class="mi">2</span> <span class="o">*</span> <span class="n">torch</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="n">num_dim</span><span class="p">),</span> <span class="n">high</span><span class="o">=</span><span class="mi">2</span> <span class="o">*</span> <span class="n">torch</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="n">num_dim</span><span class="p">))</span>
 
-
 <span class="k">def</span> <span class="nf">simulator</span><span class="p">(</span><span class="n">parameter_set</span><span class="p">):</span>
     <span class="k">return</span> <span class="mf">1.0</span> <span class="o">+</span> <span class="n">parameter_set</span> <span class="o">+</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">(</span><span class="n">parameter_set</span><span class="o">.</span><span class="n">shape</span><span class="p">)</span> <span class="o">*</span> <span class="mf">0.1</span>
 </code></pre></div>
 <p><code>sbi</code> can then run inference:</p>
-<div class="highlight"><pre><span></span><code><span class="n">posterior</span> <span class="o">=</span> <span class="n">infer</span><span class="p">(</span><span class="n">simulator</span><span class="p">,</span> <span class="n">prior</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s2">&quot;SNPE&quot;</span><span class="p">,</span> <span class="n">num_simulations</span><span class="o">=</span><span class="mi">1000</span><span class="p">)</span>
+<div class="highlight"><pre><span></span><code><span class="c1"># Other methods are &quot;SNLE&quot; or &quot;SNRE&quot;.</span>
+<span class="n">posterior</span> <span class="o">=</span> <span class="n">infer</span><span class="p">(</span><span class="n">simulator</span><span class="p">,</span> <span class="n">prior</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s2">&quot;SNPE&quot;</span><span class="p">,</span> <span class="n">num_simulations</span><span class="o">=</span><span class="mi">1000</span><span class="p">)</span>
 </code></pre></div>
 <div class="codehilite"><pre><span></span><code>Running 1000 simulations.:   0%|          | 0/1000 [00:00&lt;?, ?it/s]
 
@@ -1043,32 +947,7 @@ <h2 id="running-the-inference-procedure">Running the inference procedure<a class
 
 <p><img alt="png" src="../00_getting_started_files/00_getting_started_11_1.png" /></p>
 <h2 id="next-steps">Next steps<a class="headerlink" href="#next-steps" title="Permanent link">&para;</a></h2>
-<p>The single-line interface described above provides an easy entry for using <code>sbi</code>. However, if you are working on a larger project or need additional features, we strongly recommend using the <a href="https://www.mackelab.org/sbi/tutorial/02_flexible_interface/">flexible interface</a>.</p>
-<h2 id="requirements-for-the-simulator-prior-and-observation">Requirements for the simulator, prior, and observation<a class="headerlink" href="#requirements-for-the-simulator-prior-and-observation" title="Permanent link">&para;</a></h2>
-<p>In the interface described above, you need to provide a prior and a simulator for training. Let&rsquo;s talk about what requirements they need to satisfy.</p>
-<h3 id="prior">Prior<a class="headerlink" href="#prior" title="Permanent link">&para;</a></h3>
-<p>A prior is a distribution object that allows to sample parameter sets. Any class for the prior is allowed as long as it allows to call <code>prior.sample()</code> and <code>prior.log_prob()</code>.</p>
-<h3 id="simulator">Simulator<a class="headerlink" href="#simulator" title="Permanent link">&para;</a></h3>
-<p>The simulator is a Python callable that takes in a parameter set and outputs data with some (even if very small) stochasticity.</p>
-<p>Allowed data types and shapes for input and output:</p>
-<ul>
-<li>the input parameter set and the output have to be either a <code>np.ndarray</code> or a <code>torch.Tensor</code>. </li>
-<li>the input parameter set should have either shape <code>(1,N)</code> or <code>(N)</code>, and the output must have shape <code>(1,M)</code> or <code>(M)</code>.</li>
-</ul>
-<p>You can call simulators not written in Python as long as you wrap them in a Python function.</p>
-<h3 id="observation">Observation<a class="headerlink" href="#observation" title="Permanent link">&para;</a></h3>
-<p>Once you have a trained posterior, you will want to evaluate or sample the posterior <span class="arithmatex">\(p(\theta|x_o)\)</span> at certain observed values <span class="arithmatex">\(x_o\)</span>:</p>
-<ul>
-<li>The allowable data types are either Numpy <code>np.ndarray</code> or a torch <code>torch.Tensor</code>.</li>
-<li>The shape must be either <code>(1,M)</code> or just <code>(M)</code>.</li>
-</ul>
-<h2 id="running-different-algorithms">Running different algorithms<a class="headerlink" href="#running-different-algorithms" title="Permanent link">&para;</a></h2>
-<p><code>sbi</code> implements three classes of algorithms that can be used to obtain the posterior distribution: SNPE, SNLE, and SNRE. You can try the different algorithms by simply swapping out the <code>method</code>:</p>
-<div class="highlight"><pre><span></span><code><span class="n">posterior</span> <span class="o">=</span> <span class="n">infer</span><span class="p">(</span><span class="n">simulator</span><span class="p">,</span> <span class="n">prior</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s2">&quot;SNPE&quot;</span><span class="p">,</span> <span class="n">num_simulations</span><span class="o">=</span><span class="mi">1000</span><span class="p">)</span>
-<span class="n">posterior</span> <span class="o">=</span> <span class="n">infer</span><span class="p">(</span><span class="n">simulator</span><span class="p">,</span> <span class="n">prior</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s2">&quot;SNLE&quot;</span><span class="p">,</span> <span class="n">num_simulations</span><span class="o">=</span><span class="mi">1000</span><span class="p">)</span>
-<span class="n">posterior</span> <span class="o">=</span> <span class="n">infer</span><span class="p">(</span><span class="n">simulator</span><span class="p">,</span> <span class="n">prior</span><span class="p">,</span> <span class="n">method</span><span class="o">=</span><span class="s2">&quot;SNRE&quot;</span><span class="p">,</span> <span class="n">num_simulations</span><span class="o">=</span><span class="mi">1000</span><span class="p">)</span>
-</code></pre></div>
-<p>You can then infer, sample, evaluate, and plot the posterior as described above.</p>
+<p>The single-line interface described above provides an easy entry for using <code>sbi</code>. However, on almost any real-world problem that goes beyond a simple demonstration, we strongly recommend using the <a href="https://www.mackelab.org/sbi/tutorial/02_flexible_interface/">flexible interface</a>.</p>
                 
               
               
@@ -1104,13 +983,13 @@ <h2 id="running-different-algorithms">Running different algorithms<a class="head
       
       
         
-        <a href="../01_gaussian_amortized/" class="md-footer__link md-footer__link--next" aria-label="Next: Amortized inference" rel="next">
+        <a href="../02_flexible_interface/" class="md-footer__link md-footer__link--next" aria-label="Next: Flexible interface" rel="next">
           <div class="md-footer__title">
             <div class="md-ellipsis">
               <span class="md-footer__direction">
                 Next
               </span>
-              Amortized inference
+              Flexible interface
             </div>
           </div>
           <div class="md-footer__button md-icon">
diff --git a/tutorial/01_gaussian_amortized/index.html b/tutorial/01_gaussian_amortized/index.html
index d45c48742..122206e8f 100644
--- a/tutorial/01_gaussian_amortized/index.html
+++ b/tutorial/01_gaussian_amortized/index.html
@@ -312,6 +312,20 @@
               
   
   
+  
+    <li class="md-nav__item">
+      <a href="../02_flexible_interface/" class="md-nav__link">
+        Flexible interface
+      </a>
+    </li>
+  
+
+            
+          
+            
+              
+  
+  
     
   
   
@@ -376,34 +390,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../02_flexible_interface/" class="md-nav__link">
-        Flexible interface
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
-    <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../16_implemented_methods/" class="md-nav__link">
         Implemented algorithms
@@ -466,8 +452,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -508,8 +494,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -522,8 +508,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -536,8 +522,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
@@ -938,24 +924,14 @@ <h2 id="defining-prior-simulator-and-running-inference">Defining prior, simulato
 <div class="highlight"><pre><span></span><code><span class="k">def</span> <span class="nf">linear_gaussian</span><span class="p">(</span><span class="n">theta</span><span class="p">):</span>
     <span class="k">return</span> <span class="n">theta</span> <span class="o">+</span> <span class="mf">1.0</span> <span class="o">+</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn_like</span><span class="p">(</span><span class="n">theta</span><span class="p">)</span> <span class="o">*</span> <span class="mf">0.1</span>
 </code></pre></div>
-<p>We can then run inference:</p>
+<p>We can then run inference (either with the simple interface of with the flexible interface):</p>
 <div class="highlight"><pre><span></span><code><span class="n">posterior</span> <span class="o">=</span> <span class="n">infer</span><span class="p">(</span><span class="n">linear_gaussian</span><span class="p">,</span> <span class="n">prior</span><span class="p">,</span> <span class="s2">&quot;SNPE&quot;</span><span class="p">,</span> <span class="n">num_simulations</span><span class="o">=</span><span class="mi">1000</span><span class="p">)</span>
 </code></pre></div>
-<div class="codehilite"><pre><span></span><code>Running 1000 simulations.:   0%|          | 0/1000 [00:00&lt;?, ?it/s]
-
-
- Neural network successfully converged after 97 epochs.
-</code></pre></div>
-
 <h2 id="amortized-inference">Amortized inference<a class="headerlink" href="#amortized-inference" title="Permanent link">&para;</a></h2>
 <p>Note that we have not yet provided an observation to the inference procedure. In fact, we can evaluate the posterior for different observations without having to re-run inference. This is called amortization. An amortized posterior is one that is not focused on any particular observation. Naturally, if the diversity of observations is large, any of the inference methods will need to run a sufficient number of simulations for the resulting posterior to perform well across these diverse observations.</p>
 <p>Let&rsquo;s say we have two observations <code>x_o_1 = [0,0,0]</code> and <code>x_o_2 = [2,2,2]</code>:</p>
-<div class="highlight"><pre><span></span><code><span class="n">x_o_1</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span>
-    <span class="mi">3</span><span class="p">,</span>
-<span class="p">)</span>
-<span class="n">x_o_2</span> <span class="o">=</span> <span class="mf">2.0</span> <span class="o">*</span> <span class="n">torch</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span>
-    <span class="mi">3</span><span class="p">,</span>
-<span class="p">)</span>
+<div class="highlight"><pre><span></span><code><span class="n">x_o_1</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="mi">3</span><span class="p">,)</span>
+<span class="n">x_o_2</span> <span class="o">=</span> <span class="mf">2.0</span> <span class="o">*</span> <span class="n">torch</span><span class="o">.</span><span class="n">ones</span><span class="p">(</span><span class="mi">3</span><span class="p">,)</span>
 </code></pre></div>
 <p>We can draw samples from the posterior given <code>x_o_1</code> and then plot them:</p>
 <div class="highlight"><pre><span></span><code><span class="n">posterior_samples_1</span> <span class="o">=</span> <span class="n">posterior</span><span class="o">.</span><span class="n">sample</span><span class="p">((</span><span class="mi">10000</span><span class="p">,),</span> <span class="n">x</span><span class="o">=</span><span class="n">x_o_1</span><span class="p">)</span>
@@ -1003,7 +979,7 @@ <h2 id="amortized-inference">Amortized inference<a class="headerlink" href="#amo
     <nav class="md-footer__inner md-grid" aria-label="Footer">
       
         
-        <a href="../00_getting_started/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Getting started" rel="prev">
+        <a href="../02_flexible_interface/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Flexible interface" rel="prev">
           <div class="md-footer__button md-icon">
             <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
           </div>
@@ -1012,20 +988,20 @@ <h2 id="amortized-inference">Amortized inference<a class="headerlink" href="#amo
               <span class="md-footer__direction">
                 Previous
               </span>
-              Getting started
+              Flexible interface
             </div>
           </div>
         </a>
       
       
         
-        <a href="../02_flexible_interface/" class="md-footer__link md-footer__link--next" aria-label="Next: Flexible interface" rel="next">
+        <a href="../16_implemented_methods/" class="md-footer__link md-footer__link--next" aria-label="Next: Implemented algorithms" rel="next">
           <div class="md-footer__title">
             <div class="md-ellipsis">
               <span class="md-footer__direction">
                 Next
               </span>
-              Flexible interface
+              Implemented algorithms
             </div>
           </div>
           <div class="md-footer__button md-icon">
diff --git a/tutorial/02_flexible_interface/index.html b/tutorial/02_flexible_interface/index.html
index 79165bc6a..36f02fb5d 100644
--- a/tutorial/02_flexible_interface/index.html
+++ b/tutorial/02_flexible_interface/index.html
@@ -312,20 +312,6 @@
               
   
   
-  
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
     
   
   
@@ -398,8 +384,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -473,8 +459,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -515,8 +501,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -529,8 +515,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -543,8 +529,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
@@ -998,9 +984,7 @@ <h2 id="linear-gaussian-example">Linear Gaussian example<a class="headerlink" hr
 <div class="highlight"><pre><span></span><code><span class="n">posterior</span> <span class="o">=</span> <span class="n">inference</span><span class="o">.</span><span class="n">build_posterior</span><span class="p">(</span><span class="n">density_estimator</span><span class="p">)</span>
 </code></pre></div>
 <p>Once we have obtained the posterior, we can <code>.sample()</code>, <code>.log_prob()</code>, or <code>.pairplot()</code> in the same way as for the simple interface.</p>
-<div class="highlight"><pre><span></span><code><span class="n">x_o</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span>
-    <span class="mi">3</span><span class="p">,</span>
-<span class="p">)</span>
+<div class="highlight"><pre><span></span><code><span class="n">x_o</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="mi">3</span><span class="p">,)</span>
 </code></pre></div>
 <div class="highlight"><pre><span></span><code><span class="n">posterior_samples</span> <span class="o">=</span> <span class="n">posterior</span><span class="o">.</span><span class="n">sample</span><span class="p">((</span><span class="mi">10000</span><span class="p">,),</span> <span class="n">x</span><span class="o">=</span><span class="n">x_o</span><span class="p">)</span>
 
@@ -1038,7 +1022,7 @@ <h2 id="linear-gaussian-example">Linear Gaussian example<a class="headerlink" hr
     <nav class="md-footer__inner md-grid" aria-label="Footer">
       
         
-        <a href="../01_gaussian_amortized/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Amortized inference" rel="prev">
+        <a href="../00_getting_started/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Getting started" rel="prev">
           <div class="md-footer__button md-icon">
             <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
           </div>
@@ -1047,20 +1031,20 @@ <h2 id="linear-gaussian-example">Linear Gaussian example<a class="headerlink" hr
               <span class="md-footer__direction">
                 Previous
               </span>
-              Amortized inference
+              Getting started
             </div>
           </div>
         </a>
       
       
         
-        <a href="../11_sampler_interface/" class="md-footer__link md-footer__link--next" aria-label="Next: Sampler interface" rel="next">
+        <a href="../01_gaussian_amortized/" class="md-footer__link md-footer__link--next" aria-label="Next: Amortized inference" rel="next">
           <div class="md-footer__title">
             <div class="md-ellipsis">
               <span class="md-footer__direction">
                 Next
               </span>
-              Sampler interface
+              Amortized inference
             </div>
           </div>
           <div class="md-footer__button md-icon">
diff --git a/tutorial/03_multiround_inference/index.html b/tutorial/03_multiround_inference/index.html
index 09e86c6dd..80d4e038b 100644
--- a/tutorial/03_multiround_inference/index.html
+++ b/tutorial/03_multiround_inference/index.html
@@ -311,20 +311,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -340,8 +326,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -466,8 +452,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -508,8 +494,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -522,8 +508,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -536,8 +522,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
@@ -1053,13 +1039,13 @@ <h2 id="linear-gaussian-example">Linear Gaussian example<a class="headerlink" hr
       
       
         
-        <a href="../17_vi_posteriors/" class="md-footer__link md-footer__link--next" aria-label="Next: Using Variational Inference for Building Posteriors" rel="next">
+        <a href="../11_sampler_interface/" class="md-footer__link md-footer__link--next" aria-label="Next: Sampling algorithms in sbi" rel="next">
           <div class="md-footer__title">
             <div class="md-ellipsis">
               <span class="md-footer__direction">
                 Next
               </span>
-              Using Variational Inference for Building Posteriors
+              Sampling algorithms in sbi
             </div>
           </div>
           <div class="md-footer__button md-icon">
diff --git a/tutorial/04_density_estimators/index.html b/tutorial/04_density_estimators/index.html
index 425da195d..16d58a620 100644
--- a/tutorial/04_density_estimators/index.html
+++ b/tutorial/04_density_estimators/index.html
@@ -311,20 +311,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -340,8 +326,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -417,8 +403,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -515,8 +501,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -529,8 +515,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -543,8 +529,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
@@ -977,7 +963,7 @@ <h2 id="building-new-density-estimators-from-scratch">Building new density estim
     <nav class="md-footer__inner md-grid" aria-label="Footer">
       
         
-        <a href="../17_vi_posteriors/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Using Variational Inference for Building Posteriors" rel="prev">
+        <a href="../11_sampler_interface/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Sampling algorithms in sbi" rel="prev">
           <div class="md-footer__button md-icon">
             <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
           </div>
@@ -986,7 +972,7 @@ <h2 id="building-new-density-estimators-from-scratch">Building new density estim
               <span class="md-footer__direction">
                 Previous
               </span>
-              Using Variational Inference for Building Posteriors
+              Sampling algorithms in sbi
             </div>
           </div>
         </a>
diff --git a/tutorial/05_embedding_net/index.html b/tutorial/05_embedding_net/index.html
index b50b6b9e6..557ffd5ac 100644
--- a/tutorial/05_embedding_net/index.html
+++ b/tutorial/05_embedding_net/index.html
@@ -311,20 +311,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -340,8 +326,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -417,8 +403,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -522,8 +508,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -536,8 +522,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -550,8 +536,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
@@ -1171,13 +1157,13 @@ <h2 id="visualizing-the-results">Visualizing the results<a class="headerlink" hr
       
       
         
-        <a href="../08_restriction_estimator/" class="md-footer__link md-footer__link--next" aria-label="Next: Handling invalid simulations" rel="next">
+        <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-footer__link md-footer__link--next" aria-label="Next: SBI with trial-based data" rel="next">
           <div class="md-footer__title">
             <div class="md-ellipsis">
               <span class="md-footer__direction">
                 Next
               </span>
-              Handling invalid simulations
+              SBI with trial-based data
             </div>
           </div>
           <div class="md-footer__button md-icon">
diff --git a/tutorial/07_conditional_distributions/index.html b/tutorial/07_conditional_distributions/index.html
index cef718b8a..a8b70efa7 100644
--- a/tutorial/07_conditional_distributions/index.html
+++ b/tutorial/07_conditional_distributions/index.html
@@ -311,20 +311,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -340,8 +326,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -415,8 +401,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -457,8 +443,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -471,8 +457,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -485,8 +471,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/tutorial/08_restriction_estimator/index.html b/tutorial/08_restriction_estimator/index.html
index d96e26ad2..9cb75b2ad 100644
--- a/tutorial/08_restriction_estimator/index.html
+++ b/tutorial/08_restriction_estimator/index.html
@@ -311,20 +311,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -340,8 +326,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -417,8 +403,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -457,6 +443,20 @@
               
   
   
+  
+    <li class="md-nav__item">
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
+      </a>
+    </li>
+  
+
+            
+          
+            
+              
+  
+  
     
   
   
@@ -537,20 +537,6 @@
 
             
           
-            
-              
-  
-  
-  
-    <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
-      </a>
-    </li>
-  
-
-            
-          
         </ul>
       </nav>
     </li>
@@ -1082,7 +1068,7 @@ <h2 id="further-options-for-tuning-the-algorithm">Further options for tuning the
     <nav class="md-footer__inner md-grid" aria-label="Footer">
       
         
-        <a href="../05_embedding_net/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Learning summary statistics" rel="prev">
+        <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-footer__link md-footer__link--prev" aria-label="Previous: SBI with trial-based data" rel="prev">
           <div class="md-footer__button md-icon">
             <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
           </div>
@@ -1091,7 +1077,7 @@ <h2 id="further-options-for-tuning-the-algorithm">Further options for tuning the
               <span class="md-footer__direction">
                 Previous
               </span>
-              Learning summary statistics
+              SBI with trial-based data
             </div>
           </div>
         </a>
diff --git a/tutorial/09_sensitivity_analysis/index.html b/tutorial/09_sensitivity_analysis/index.html
index b0e691433..b7a80d3cf 100644
--- a/tutorial/09_sensitivity_analysis/index.html
+++ b/tutorial/09_sensitivity_analysis/index.html
@@ -311,20 +311,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -340,8 +326,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -415,8 +401,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -457,8 +443,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -471,8 +457,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -485,8 +471,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/tutorial/10_crafting_summary_statistics/index.html b/tutorial/10_crafting_summary_statistics/index.html
index d842e288a..f69c3a25c 100644
--- a/tutorial/10_crafting_summary_statistics/index.html
+++ b/tutorial/10_crafting_summary_statistics/index.html
@@ -311,20 +311,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -340,8 +326,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -417,8 +403,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -458,6 +444,20 @@
   
   
   
+    <li class="md-nav__item">
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
+      </a>
+    </li>
+  
+
+            
+          
+            
+              
+  
+  
+  
     <li class="md-nav__item">
       <a href="../08_restriction_estimator/" class="md-nav__link">
         Handling invalid simulations
@@ -491,20 +491,6 @@
 
             
           
-            
-              
-  
-  
-  
-    <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
-      </a>
-    </li>
-  
-
-            
-          
         </ul>
       </nav>
     </li>
@@ -1167,13 +1153,13 @@ <h2 id="17-explicit-recommendations">1.7 Explicit recommendations<a class="heade
       
       
         
-        <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-footer__link md-footer__link--next" aria-label="Next: SBI with trial-based data" rel="next">
+        <a href="../12_diagnostics_posterior_predictive_check/" class="md-footer__link md-footer__link--next" aria-label="Next: Posterior predictive checks" rel="next">
           <div class="md-footer__title">
             <div class="md-ellipsis">
               <span class="md-footer__direction">
                 Next
               </span>
-              SBI with trial-based data
+              Posterior predictive checks
             </div>
           </div>
           <div class="md-footer__button md-icon">
diff --git a/tutorial/11_sampler_interface/index.html b/tutorial/11_sampler_interface/index.html
index 4c5ebcf15..bcb5ba532 100644
--- a/tutorial/11_sampler_interface/index.html
+++ b/tutorial/11_sampler_interface/index.html
@@ -16,7 +16,7 @@
     
     
       
-        <title>Sampler interface - sbi</title>
+        <title>Sampling algorithms in sbi - sbi</title>
       
     
     
@@ -71,7 +71,7 @@
     <div data-md-component="skip">
       
         
-        <a href="#the-sampler-interface" class="md-skip">
+        <a href="#sampling-algorithms-in-sbi" class="md-skip">
           Skip to content
         </a>
       
@@ -102,7 +102,7 @@
         <div class="md-header__topic" data-md-component="header-topic">
           <span class="md-ellipsis">
             
-              Sampler interface
+              Sampling algorithms in sbi
             
           </span>
         </div>
@@ -270,14 +270,12 @@
               
   
   
-    
-  
   
     
-    <li class="md-nav__item md-nav__item--active md-nav__item--nested">
+    <li class="md-nav__item md-nav__item--nested">
       
       
-        <input class="md-nav__toggle md-toggle" data-md-toggle="__nav_3_1" type="checkbox" id="__nav_3_1" checked>
+        <input class="md-nav__toggle md-toggle" data-md-toggle="__nav_3_1" type="checkbox" id="__nav_3_1" >
       
       
       
@@ -313,20 +311,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -340,67 +324,11 @@
               
   
   
-    
-  
   
-    <li class="md-nav__item md-nav__item--active">
-      
-      <input class="md-nav__toggle md-toggle" data-md-toggle="toc" type="checkbox" id="__toc">
-      
-      
-        
-      
-      
-        <label class="md-nav__link md-nav__link--active" for="__toc">
-          Sampler interface
-          <span class="md-nav__icon md-icon"></span>
-        </label>
-      
-      <a href="./" class="md-nav__link md-nav__link--active">
-        Sampler interface
+    <li class="md-nav__item">
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
-      
-        
-
-
-<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
-  
-  
-  
-    
-  
-  
-    <label class="md-nav__title" for="__toc">
-      <span class="md-nav__icon md-icon"></span>
-      Table of contents
-    </label>
-    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
-      
-        <li class="md-nav__item">
-  <a href="#main-syntax-for-snle" class="md-nav__link">
-    Main syntax for SNLE
-  </a>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#further-explanation" class="md-nav__link">
-    Further explanation
-  </a>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#main-syntax-for-snpe" class="md-nav__link">
-    Main syntax for SNPE
-  </a>
-  
-</li>
-      
-    </ul>
-  
-</nav>
-      
     </li>
   
 
@@ -431,12 +359,14 @@
               
   
   
+    
+  
   
     
-    <li class="md-nav__item md-nav__item--nested">
+    <li class="md-nav__item md-nav__item--active md-nav__item--nested">
       
       
-        <input class="md-nav__toggle md-toggle" data-md-toggle="__nav_3_2" type="checkbox" id="__nav_3_2" >
+        <input class="md-nav__toggle md-toggle" data-md-toggle="__nav_3_2" type="checkbox" id="__nav_3_2" checked>
       
       
       
@@ -471,11 +401,21 @@
               
   
   
+    
+  
   
-    <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+    <li class="md-nav__item md-nav__item--active">
+      
+      <input class="md-nav__toggle md-toggle" data-md-toggle="toc" type="checkbox" id="__toc">
+      
+      
+        
+      
+      
+      <a href="./" class="md-nav__link md-nav__link--active">
+        Sampling algorithms in sbi
       </a>
+      
     </li>
   
 
@@ -515,8 +455,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -529,8 +469,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -543,8 +483,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
@@ -889,35 +829,6 @@
     
   
   
-    <label class="md-nav__title" for="__toc">
-      <span class="md-nav__icon md-icon"></span>
-      Table of contents
-    </label>
-    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
-      
-        <li class="md-nav__item">
-  <a href="#main-syntax-for-snle" class="md-nav__link">
-    Main syntax for SNLE
-  </a>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#further-explanation" class="md-nav__link">
-    Further explanation
-  </a>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#main-syntax-for-snpe" class="md-nav__link">
-    Main syntax for SNPE
-  </a>
-  
-</li>
-      
-    </ul>
-  
 </nav>
                   </div>
                 </div>
@@ -933,7 +844,7 @@
                   </a>
                 
                 
-                <h1 id="the-sampler-interface">The sampler interface<a class="headerlink" href="#the-sampler-interface" title="Permanent link">&para;</a></h1>
+                <h1 id="sampling-algorithms-in-sbi">Sampling algorithms in <code>sbi</code><a class="headerlink" href="#sampling-algorithms-in-sbi" title="Permanent link">&para;</a></h1>
 <p>Note: this tutorial requires that the user is already familiar with the <a href="https://sbi-dev.github.io/sbi/tutorial/02_flexible_interface/">flexible interface</a>.</p>
 <p><code>sbi</code> implements three methods: SNPE, SNLE, and SNRE. When using SNPE, the trained neural network directly approximates the posterior. Thus, sampling from the posterior can be done by sampling from the trained neural network. The neural networks trained in SNLE and SNRE approximate the likelihood(-ratio). Thus, in order to draw samples from the posterior, one has to perform additional sampling steps, e.g. Markov-chain Monte-Carlo (MCMC). In <code>sbi</code>, the implemented samplers are:</p>
 <ul>
@@ -947,12 +858,44 @@ <h1 id="the-sampler-interface">The sampler interface<a class="headerlink" href="
 <p>Variational inference (VI)</p>
 </li>
 </ul>
-<p>When using the flexible interface, the sampler as well as its attributes can be set with <code>sample_with="mcmc"</code>, <code>mcmc_method="slice_np"</code>, and <code>mcmc_parameters={}</code>. However, for full flexibility in customizing the sampler, we recommend using the <strong>sampler interface</strong>. This interface is described here. Further details can be found <a href="https://github.com/sbi-dev/sbi/pull/573">here</a>.</p>
-<h2 id="main-syntax-for-snle">Main syntax for SNLE<a class="headerlink" href="#main-syntax-for-snle" title="Permanent link">&para;</a></h2>
+<p>Below, we will demonstrate how these samplers can be used in <code>sbi</code>. First, we train the neural network as always:</p>
+<div class="highlight"><pre><span></span><code><span class="kn">import</span> <span class="nn">torch</span>
+<span class="kn">from</span> <span class="nn">sbi.inference</span> <span class="kn">import</span> <span class="n">SNLE</span>
+
+<span class="c1"># dummy Gaussian simulator for demonstration</span>
+<span class="n">num_dim</span> <span class="o">=</span> <span class="mi">2</span>
+<span class="n">prior</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">distributions</span><span class="o">.</span><span class="n">MultivariateNormal</span><span class="p">(</span><span class="n">torch</span><span class="o">.</span><span class="n">zeros</span><span class="p">(</span><span class="n">num_dim</span><span class="p">),</span> <span class="n">torch</span><span class="o">.</span><span class="n">eye</span><span class="p">(</span><span class="n">num_dim</span><span class="p">))</span>
+<span class="n">theta</span> <span class="o">=</span> <span class="n">prior</span><span class="o">.</span><span class="n">sample</span><span class="p">((</span><span class="mi">1000</span><span class="p">,))</span>
+<span class="n">x</span> <span class="o">=</span> <span class="n">theta</span> <span class="o">+</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">((</span><span class="mi">1000</span><span class="p">,</span> <span class="n">num_dim</span><span class="p">))</span>
+<span class="n">x_o</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">randn</span><span class="p">((</span><span class="mi">1</span><span class="p">,</span> <span class="n">num_dim</span><span class="p">))</span>
+
+<span class="n">inference</span> <span class="o">=</span> <span class="n">SNLE</span><span class="p">(</span><span class="n">prior</span><span class="o">=</span><span class="n">prior</span><span class="p">,</span> <span class="n">show_progress_bars</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
+<span class="n">likelihood_estimator</span> <span class="o">=</span> <span class="n">inference</span><span class="o">.</span><span class="n">append_simulations</span><span class="p">(</span><span class="n">theta</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">train</span><span class="p">()</span>
+</code></pre></div>
+<p>And then we pass the options for which sampling method to use to the <code>build_posterior()</code> method:</p>
+<div class="highlight"><pre><span></span><code><span class="c1"># Sampling with MCMC</span>
+<span class="n">sampling_algorithm</span> <span class="o">=</span> <span class="s2">&quot;mcmc&quot;</span>
+<span class="n">mcmc_method</span> <span class="o">=</span> <span class="s2">&quot;slice_np&quot;</span>  <span class="c1"># or nuts, or hmc</span>
+<span class="n">posterior</span> <span class="o">=</span> <span class="n">inference</span><span class="o">.</span><span class="n">build_posterior</span><span class="p">(</span><span class="n">sample_with</span><span class="o">=</span><span class="n">sampling_algorithm</span><span class="p">,</span> <span class="n">mcmc_method</span><span class="o">=</span><span class="n">mcmc_method</span><span class="p">)</span>
+
+<span class="c1"># Sampling with variational inference</span>
+<span class="n">sampling_algorithm</span> <span class="o">=</span> <span class="s2">&quot;vi&quot;</span>
+<span class="n">vi_method</span> <span class="o">=</span> <span class="s2">&quot;rKL&quot;</span>  <span class="c1"># or fKL</span>
+<span class="n">posterior</span> <span class="o">=</span> <span class="n">inference</span><span class="o">.</span><span class="n">build_posterior</span><span class="p">(</span><span class="n">sample_with</span><span class="o">=</span><span class="n">sampling_algorithm</span><span class="p">,</span> <span class="n">vi_method</span><span class="o">=</span><span class="n">vi_method</span><span class="p">)</span>
+<span class="c1"># Unlike other methods, vi needs a training step for every observation.</span>
+<span class="n">posterior</span> <span class="o">=</span> <span class="n">posterior</span><span class="o">.</span><span class="n">set_default_x</span><span class="p">(</span><span class="n">x_o</span><span class="p">)</span><span class="o">.</span><span class="n">train</span><span class="p">()</span>
+
+<span class="c1"># Sampling with rejection sampling</span>
+<span class="n">sampling_algorithm</span> <span class="o">=</span> <span class="s2">&quot;rejection&quot;</span>
+<span class="n">posterior</span> <span class="o">=</span> <span class="n">inference</span><span class="o">.</span><span class="n">build_posterior</span><span class="p">(</span><span class="n">sample_with</span><span class="o">=</span><span class="n">sampling_algorithm</span><span class="p">)</span>
+</code></pre></div>
+<h1 id="more-flexibility-in-adjusting-the-sampler">More flexibility in adjusting the sampler<a class="headerlink" href="#more-flexibility-in-adjusting-the-sampler" title="Permanent link">&para;</a></h1>
+<p>With the above syntax, you can easily try out different sampling algorithms. However, in many cases, you might want to customize your sampler. Below, we demonstrate how you can change hyperparameters of the samplers (e.g. number of warm-up steps of MCMC) or how you can write your own sampler from scratch.</p>
+<h2 id="main-syntax-for-snle-and-snre">Main syntax (for SNLE and SNRE)<a class="headerlink" href="#main-syntax-for-snle-and-snre" title="Permanent link">&para;</a></h2>
+<p>As above, we begin by training the neural network as always:</p>
 <div class="highlight"><pre><span></span><code><span class="kn">import</span> <span class="nn">torch</span>
 
 <span class="kn">from</span> <span class="nn">sbi.inference</span> <span class="kn">import</span> <span class="n">SNLE</span>
-<span class="kn">from</span> <span class="nn">sbi.inference</span> <span class="kn">import</span> <span class="n">likelihood_estimator_based_potential</span><span class="p">,</span> <span class="n">MCMCPosterior</span>
 
 <span class="c1"># dummy Gaussian simulator for demonstration</span>
 <span class="n">num_dim</span> <span class="o">=</span> <span class="mi">2</span>
@@ -963,17 +906,31 @@ <h2 id="main-syntax-for-snle">Main syntax for SNLE<a class="headerlink" href="#m
 
 <span class="n">inference</span> <span class="o">=</span> <span class="n">SNLE</span><span class="p">(</span><span class="n">show_progress_bars</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span>
 <span class="n">likelihood_estimator</span> <span class="o">=</span> <span class="n">inference</span><span class="o">.</span><span class="n">append_simulations</span><span class="p">(</span><span class="n">theta</span><span class="p">,</span> <span class="n">x</span><span class="p">)</span><span class="o">.</span><span class="n">train</span><span class="p">()</span>
+</code></pre></div>
+<div class="codehilite"><pre><span></span><code> Neural network successfully converged after 52 epochs.
+</code></pre></div>
+
+<p>Then, for full flexibility on using the sampler, we do not use the <code>.build_posterior()</code> method, but instead we explicitly define the potential function and the sampling algorithm (see below for explanation):</p>
+<div class="highlight"><pre><span></span><code><span class="kn">from</span> <span class="nn">sbi.inference</span> <span class="kn">import</span> <span class="n">likelihood_estimator_based_potential</span><span class="p">,</span> <span class="n">MCMCPosterior</span>
 
 <span class="n">potential_fn</span><span class="p">,</span> <span class="n">parameter_transform</span> <span class="o">=</span> <span class="n">likelihood_estimator_based_potential</span><span class="p">(</span>
     <span class="n">likelihood_estimator</span><span class="p">,</span> <span class="n">prior</span><span class="p">,</span> <span class="n">x_o</span>
 <span class="p">)</span>
 <span class="n">posterior</span> <span class="o">=</span> <span class="n">MCMCPosterior</span><span class="p">(</span>
-    <span class="n">potential_fn</span><span class="p">,</span> <span class="n">proposal</span><span class="o">=</span><span class="n">prior</span><span class="p">,</span> <span class="n">theta_transform</span><span class="o">=</span><span class="n">parameter_transform</span>
+    <span class="n">potential_fn</span><span class="p">,</span> <span class="n">proposal</span><span class="o">=</span><span class="n">prior</span><span class="p">,</span> <span class="n">theta_transform</span><span class="o">=</span><span class="n">parameter_transform</span><span class="p">,</span> <span class="n">warmup_steps</span><span class="o">=</span><span class="mi">10</span>
 <span class="p">)</span>
 </code></pre></div>
-<div class="codehilite"><pre><span></span><code> Neural network successfully converged after 52 epochs.
-</code></pre></div>
+<p>If you want to use variational inference or rejection sampling, you have to replace the last line with <code>VIPosterior</code> or <code>RejectionPosterior</code>:</p>
+<div class="highlight"><pre><span></span><code><span class="c1"># For VI, we have to train.</span>
+<span class="n">posterior</span> <span class="o">=</span> <span class="n">VIPosterior</span><span class="p">(</span>
+    <span class="n">potential_fn</span><span class="p">,</span> <span class="n">proposal</span><span class="o">=</span><span class="n">prior</span><span class="p">,</span> <span class="n">theta_transform</span><span class="o">=</span><span class="n">parameter_transform</span>
+<span class="p">)</span><span class="o">.</span><span class="n">train</span><span class="p">()</span>
 
+<span class="n">posterior</span> <span class="o">=</span> <span class="n">RejectionPosterior</span><span class="p">(</span>
+    <span class="n">potential_fn</span><span class="p">,</span> <span class="n">proposal</span><span class="o">=</span><span class="n">prior</span><span class="p">,</span> <span class="n">theta_transform</span><span class="o">=</span><span class="n">parameter_transform</span>
+<span class="p">)</span>
+</code></pre></div>
+<p>At this point, you could also plug the <code>potential_fn</code> into any sampler of your choice and not rely on any of the in-built <code>sbi</code>-samplers.</p>
 <h2 id="further-explanation">Further explanation<a class="headerlink" href="#further-explanation" title="Permanent link">&para;</a></h2>
 <p>The first lines are the same as for the flexible interface:</p>
 <div class="highlight"><pre><span></span><code><span class="n">inference</span> <span class="o">=</span> <span class="n">SNLE</span><span class="p">()</span>
@@ -1043,7 +1000,7 @@ <h2 id="main-syntax-for-snpe">Main syntax for SNPE<a class="headerlink" href="#m
     <nav class="md-footer__inner md-grid" aria-label="Footer">
       
         
-        <a href="../02_flexible_interface/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Flexible interface" rel="prev">
+        <a href="../03_multiround_inference/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Multi-round inference" rel="prev">
           <div class="md-footer__button md-icon">
             <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
           </div>
@@ -1052,20 +1009,20 @@ <h2 id="main-syntax-for-snpe">Main syntax for SNPE<a class="headerlink" href="#m
               <span class="md-footer__direction">
                 Previous
               </span>
-              Flexible interface
+              Multi-round inference
             </div>
           </div>
         </a>
       
       
         
-        <a href="../16_implemented_methods/" class="md-footer__link md-footer__link--next" aria-label="Next: Implemented algorithms" rel="next">
+        <a href="../04_density_estimators/" class="md-footer__link md-footer__link--next" aria-label="Next: Custom density estimators" rel="next">
           <div class="md-footer__title">
             <div class="md-ellipsis">
               <span class="md-footer__direction">
                 Next
               </span>
-              Implemented algorithms
+              Custom density estimators
             </div>
           </div>
           <div class="md-footer__button md-icon">
diff --git a/tutorial/12_diagnostics_posterior_predictive_check/index.html b/tutorial/12_diagnostics_posterior_predictive_check/index.html
index 9dd47aeee..617c08db0 100644
--- a/tutorial/12_diagnostics_posterior_predictive_check/index.html
+++ b/tutorial/12_diagnostics_posterior_predictive_check/index.html
@@ -311,20 +311,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -340,8 +326,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -415,8 +401,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -457,8 +443,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -471,8 +457,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -485,8 +471,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
@@ -1033,7 +1019,7 @@ <h2 id="performing-a-ppc-over-a-toy-example">Performing a PPC over a toy example
     <nav class="md-footer__inner md-grid" aria-label="Footer">
       
         
-        <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-footer__link md-footer__link--prev" aria-label="Previous: SBI with trial-based data" rel="prev">
+        <a href="../10_crafting_summary_statistics/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Crafting summary statistics" rel="prev">
           <div class="md-footer__button md-icon">
             <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
           </div>
@@ -1042,7 +1028,7 @@ <h2 id="performing-a-ppc-over-a-toy-example">Performing a PPC over a toy example
               <span class="md-footer__direction">
                 Previous
               </span>
-              SBI with trial-based data
+              Crafting summary statistics
             </div>
           </div>
         </a>
diff --git a/tutorial/13_diagnostics_simulation_based_calibration/index.html b/tutorial/13_diagnostics_simulation_based_calibration/index.html
index ade247774..8bba7e91b 100644
--- a/tutorial/13_diagnostics_simulation_based_calibration/index.html
+++ b/tutorial/13_diagnostics_simulation_based_calibration/index.html
@@ -311,20 +311,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -340,8 +326,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -415,8 +401,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -457,8 +443,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -471,8 +457,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -485,8 +471,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/tutorial/14_iid_data_and_permutation_invariant_embeddings/index.html b/tutorial/14_iid_data_and_permutation_invariant_embeddings/index.html
index 538b9e998..3a319fd50 100644
--- a/tutorial/14_iid_data_and_permutation_invariant_embeddings/index.html
+++ b/tutorial/14_iid_data_and_permutation_invariant_embeddings/index.html
@@ -311,20 +311,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -340,8 +326,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -417,8 +403,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -457,34 +443,6 @@
               
   
   
-  
-    <li class="md-nav__item">
-      <a href="../08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
-    <li class="md-nav__item">
-      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
     
   
   
@@ -612,6 +570,34 @@
 
             
           
+            
+              
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
+      </a>
+    </li>
+  
+
+            
+          
+            
+              
+  
+  
+  
+    <li class="md-nav__item">
+      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
+      </a>
+    </li>
+  
+
+            
+          
         </ul>
       </nav>
     </li>
@@ -1057,30 +1043,29 @@
                 
                 <h1 id="sbi-with-iid-data-and-permutation-invariant-embeddings">SBI with iid data and permutation-invariant embeddings<a class="headerlink" href="#sbi-with-iid-data-and-permutation-invariant-embeddings" title="Permanent link">&para;</a></h1>
 <p>There are scenarios in which we observe multiple data points per experiment and we can assume that they are independent and identically distributed (iid, i.e., they are assumed to have the same underlying model parameters). 
-For example, in a decision-making experiments, the experiment is often repeated in trials with the same experimental settings and conditions. The corresponding set of trials is then assumed to be &ldquo;iid&rdquo;. 
+For example, in decision-making experiments, the experiment is often repeated in trials with the same experimental settings and conditions. The corresponding set of trials is then assumed to be &ldquo;iid&rdquo; given a single parameter set. 
 In such a scenario, we may want to obtain the posterior given a set of observation <span class="arithmatex">\(p(\theta | X=\{x_i\}_i^N)\)</span>. </p>
 <h3 id="amortization-of-neural-network-training-iid-inference-with-nle-nre">Amortization of neural network training: iid-inference with NLE / NRE<a class="headerlink" href="#amortization-of-neural-network-training-iid-inference-with-nle-nre" title="Permanent link">&para;</a></h3>
-<p>For some SBI variants the iid assumption can be exploited: when using a likelihood-based SBI method (<code>SNLE</code>, <code>SNRE</code>) one can train the density or ratio estimator on single-trial data, and then perform inference with <code>MCMC</code>. Crucially, because the data is iid and the estimator is trained on single-trial data, one can repeat the inference with a different <code>x_o</code> (a different set of trials, or different number of trials) without having to retrain the density estimator. One can interpet this as amortization of the SBI training: we can obtain a neural likelihood, or likelihood-ratio estimate for new <code>x_o</code>s without retraining, but we still have to run <code>MCMC</code> or <code>VI</code> to do inference. </p>
-<p>In addition, one can not only change the number of trials of a new <code>x_o</code>, but also the entire inference setting. 
-For example, one can apply hierarchical inference scenarios with changing hierarchical denpendencies between the model parameters&ndash;all without having to retrain the density estimator because that is based on estimating single-trail likelihoods.</p>
+<p>For some SBI variants the iid assumption can be exploited: when using a likelihood-based SBI method (<code>SNLE</code>, <code>SNRE</code>) one can train the density or ratio estimator on single-trial data, and then perform inference with <code>MCMC</code> or variational inference (<code>VI</code>). Crucially, because the data is iid and the estimator is trained on single-trial data, one can repeat the inference with a different <code>x_o</code> (a different set of trials, or different number of trials) without having to retrain the density estimator. One can interpet this as amortization of the SBI training: we can obtain a neural likelihood, or likelihood-ratio estimate for new <code>x_o</code>s without retraining, but we still have to run <code>MCMC</code> or <code>VI</code> to do inference. </p>
+<p>In addition, one cannot only change the number of trials of a new <code>x_o</code>, but also the entire inference setting. 
+For example, one can apply hierarchical inference with changing hierarchical denpendencies between the model parameters&ndash;all without having to retrain the density estimator because it estimates single-trail likelihoods.</p>
 <h3 id="full-amortization-iid-inference-with-npe-and-permutation-invariant-embedding-nets">Full amortization: iid-inference with NPE and permutation-invariant embedding nets<a class="headerlink" href="#full-amortization-iid-inference-with-npe-and-permutation-invariant-embedding-nets" title="Permanent link">&para;</a></h3>
-<p>When performing neural posterior estimation (<code>SNPE</code>) we cannot exploit the iid assumption directly because we are learning a density estimator in <code>theta</code>. 
+<p>When performing neural posterior estimation (<code>SNPE</code>) we cannot exploit the iid assumption directly. 
 Thus, the underlying neural network takes <code>x</code> as input and predicts the parameters of the density estimator. 
-As a consequence, if <code>x</code> is a set of iid observations <span class="arithmatex">\(X=\{x_i\}_i^N\)</span> then the neural network has to be invariant to permutations of this set, i.e., it has to be permutation invariant. 
-Overall, this means that we <em>can</em> use <code>SNPE</code> for inference with iid data, however, we need to provide a corresponding embedding network that handles the iid-data and is permutation invariant. 
-This will likely require some hyperparameter tuning and more training data for the inference to work accurately. But once we have this, the inference is fully amortized, i.e., we can get new posterior samples basically instantly without retraining and without running <code>MCMC</code> or <code>VI</code>. </p>
-<p>Let us first have a look how trial-based inference works in <code>SBI</code> before we discuss models with &ldquo;mixed data types&rdquo;.</p>
+As a consequence, if <code>x</code> is a set of iid observations <span class="arithmatex">\(X=\{x_i\}_i^N\)</span> then the neural network has to be invariant to permutations of this set, i.e., it has to be permutation invariant. In addition, the neural network has to be able to consume a varying number of iid datapoints in order to be amortized over the number of trials.
+Therefore, in order to use <code>SNPE</code> for inference on iid data, we need to provide a corresponding embedding network that handles the iid-data. 
+This will likely require some hyperparameter tuning and more training data for inference to work accurately. But once we have this, inference is fully amortized, i.e., we can get new posterior samples almost instantly without retraining and without running <code>MCMC</code> or <code>VI</code>. </p>
 <h2 id="sbi-with-trial-based-data">SBI with trial-based data<a class="headerlink" href="#sbi-with-trial-based-data" title="Permanent link">&para;</a></h2>
-<p>For illustration we use a simple linear Gaussian simulator, as in previous tutorials. The simulator takes a single parameter (vector), the mean of the Gaussian, and its variance is set to one. 
-We define a Gaussian prior over the mean and perform inference. 
-The observed data is again a from a Gaussian with some fixed &ldquo;ground-truth&rdquo; parameter <span class="arithmatex">\(\theta_o\)</span>. 
-Crucially, the observed data <code>x_o</code> can consist of multiple samples given the same ground-truth parameters and these samples are then iid: </p>
+<p>For illustration, we use a simple linear Gaussian simulator, as in previous tutorials. The simulator takes a single parameter (vector) which is the mean of a Gaussian. The simulator then adds noise with a fixed variance (set to one). 
+We define a Gaussian prior over the mean and perform inference. </p>
+<p>The observed data is also sampled from a Gaussian with some fixed &ldquo;ground-truth&rdquo; parameter <span class="arithmatex">\(\theta_o\)</span>. 
+Crucially, the observed data <code>x_o</code> can consist of multiple samples given the same ground-truth parameters and these samples are iid given <span class="arithmatex">\(\theta\)</span>: </p>
 <div class="arithmatex">\[ 
 \theta \sim \mathcal{N}(\mu_0,\; \Sigma_0) \\
 x | \theta \sim \mathcal{N}(\theta,\; \Sigma=I) \\
 \mathbf{x_o} = \{x_o^i\}_{i=1}^N \sim  \mathcal{N}(\theta_o,\; \Sigma=I)
 \]</div>
-<p>For this toy problem the ground-truth posterior is well defined, it is again a Gaussian, centered on the mean of <span class="arithmatex">\(\mathbf{x_o}\)</span> and with variance scaled by the number of trials <span class="arithmatex">\(N\)</span>, i.e., the more trials we observe, the more information about the underlying <span class="arithmatex">\(\theta_o\)</span> we have and the more concentrated the posteriors becomes.</p>
+<p>For this toy problem, the ground-truth posterior is well defined, it is again a Gaussian, centered on the mean of <span class="arithmatex">\(\mathbf{x_o}\)</span> and with variance scaled by the number of trials <span class="arithmatex">\(N\)</span>, i.e., the more trials we observe, the more information about the underlying <span class="arithmatex">\(\theta_o\)</span> we have and the more concentrated the posteriors becomes.</p>
 <p>We will illustrate this below:</p>
 <div class="highlight"><pre><span></span><code><span class="kn">import</span> <span class="nn">torch</span>
 <span class="kn">import</span> <span class="nn">matplotlib.pyplot</span> <span class="k">as</span> <span class="nn">plt</span>
@@ -1156,8 +1141,7 @@ <h3 id="the-analytical-posterior-concentrates-around-true-parameters-with-increa
 <p><img alt="png" src="../14_iid_data_and_permutation_invariant_embeddings_files/14_iid_data_and_permutation_invariant_embeddings_6_0.png" /></p>
 <p>Indeed, with increasing number of trials the posterior density concentrates around the true underlying parameter.</p>
 <h2 id="iid-inference-with-nle">IID inference with NLE<a class="headerlink" href="#iid-inference-with-nle" title="Permanent link">&para;</a></h2>
-<p>(S)NLE can easily perform inference given multiple IID x because it is based on learning the likelihood. Once the likelihood is learned on single trials, i.e., a neural network that given a single observation and a parameter predicts the likelihood of that observation given the parameter, one can perform MCMC to obtain posterior samples. </p>
-<p>MCMC relies on evaluating ratios of likelihoods of candidate parameters to either accept or reject them to be posterior samples. When inferring the posterior given multiple IID observation, these likelihoods are just the joint likelihoods of each IID observation given the current parameter candidate. Thus, given a neural likelihood from SNLE, we can calculate these joint likelihoods and perform MCMC given IID data, we just have to multiply together (or add in log-space) the individual trial-likelihoods (<code>sbi</code> takes care of that).</p>
+<p>(S)NLE and (S)NRE can perform inference given multiple IID obserations by using only single-trial training data (i.e., for training, we run the simulator only once per parameter set). Once the likelihood is learned on single trials (i.e., a neural network that predicts the likelihood of a single observation given a parameter set), one can sample the posterior for any number of trials. This works because, given a single-trial neural likelihood from (S)NLE or (S)NRE, we can calculate the joint likelihoods of all trials by multiplying them together (or adding them in log-space). The joint likelihood can then be plugged into <code>MCMC</code> or <code>VI</code>. <code>sbi</code> takes care of all of these steps, so you do not have to implement anything yourself:</p>
 <div class="highlight"><pre><span></span><code><span class="c1"># Train SNLE.</span>
 <span class="n">inferer</span> <span class="o">=</span> <span class="n">SNLE</span><span class="p">(</span><span class="n">prior</span><span class="p">,</span> <span class="n">show_progress_bars</span><span class="o">=</span><span class="kc">True</span><span class="p">,</span> <span class="n">density_estimator</span><span class="o">=</span><span class="s2">&quot;mdn&quot;</span><span class="p">)</span>
 <span class="n">theta</span><span class="p">,</span> <span class="n">x</span> <span class="o">=</span> <span class="n">simulate_for_sbi</span><span class="p">(</span><span class="n">simulator</span><span class="p">,</span> <span class="n">prior</span><span class="p">,</span> <span class="mi">10000</span><span class="p">,</span> <span class="n">simulation_batch_size</span><span class="o">=</span><span class="mi">1000</span><span class="p">)</span>
@@ -1265,10 +1249,11 @@ <h2 id="iid-inference-with-nle">IID inference with NLE<a class="headerlink" href
 </code></pre></div>
 
 <h2 id="iid-inference-with-npe-using-permutation-invariant-embedding-nets">IID inference with NPE using permutation-invariant embedding nets<a class="headerlink" href="#iid-inference-with-npe-using-permutation-invariant-embedding-nets" title="Permanent link">&para;</a></h2>
-<p>For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials. </p>
+<p>For NPE we need to define an embedding net that handles the set-like structure of iid-data, i.e., that it permutation invariant and can handle different number of trials.  </p>
 <p>We implemented several embedding net classes that allow to construct such a permutation- and number-of-trials invariant embedding net. </p>
 <p>To become permutation invariant, the neural net first learns embeddings for single trials and then performs a permutation invariant operation on those embeddings, e.g., by taking the sum or the mean (Chen et al. 2018, Radev et al. 2021).</p>
-<p>To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. As it is difficult to handle tensors of varying lengths in the SBI training loop, we construct a training data set in which &ldquo;unobserved&rdquo; trials are mask by NaNs (and ignore the resulting SBI warning about NaNs in the training data).</p>
+<p>To become invariant w.r.t. the number-of-trials, we train the net with varying number of trials for each parameter setting. This means that, unlike for (S)NLE and (S)NRE, (S)NPE requires to run the simulator multiple times for individual parameter sets to generate the training data.</p>
+<p>In order to implement this in <code>sbi</code>, &ldquo;unobserved&rdquo; trials in the training dataset have to be masked by NaNs (and ignore the resulting SBI warning about NaNs in the training data).</p>
 <h3 id="construct-training-data-set">Construct training data set.<a class="headerlink" href="#construct-training-data-set" title="Permanent link">&para;</a></h3>
 <div class="highlight"><pre><span></span><code><span class="c1"># we need to fix the maximum number of trials.</span>
 <span class="n">max_num_trials</span> <span class="o">=</span> <span class="mi">20</span>
@@ -1486,7 +1471,7 @@ <h3 id="amortized-inference">Amortized inference<a class="headerlink" href="#amo
     <nav class="md-footer__inner md-grid" aria-label="Footer">
       
         
-        <a href="../10_crafting_summary_statistics/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Crafting summary statistics" rel="prev">
+        <a href="../05_embedding_net/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Learning summary statistics" rel="prev">
           <div class="md-footer__button md-icon">
             <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
           </div>
@@ -1495,20 +1480,20 @@ <h3 id="amortized-inference">Amortized inference<a class="headerlink" href="#amo
               <span class="md-footer__direction">
                 Previous
               </span>
-              Crafting summary statistics
+              Learning summary statistics
             </div>
           </div>
         </a>
       
       
         
-        <a href="../12_diagnostics_posterior_predictive_check/" class="md-footer__link md-footer__link--next" aria-label="Next: Posterior predictive checks" rel="next">
+        <a href="../08_restriction_estimator/" class="md-footer__link md-footer__link--next" aria-label="Next: Handling invalid simulations" rel="next">
           <div class="md-footer__title">
             <div class="md-ellipsis">
               <span class="md-footer__direction">
                 Next
               </span>
-              Posterior predictive checks
+              Handling invalid simulations
             </div>
           </div>
           <div class="md-footer__button md-icon">
diff --git a/tutorial/14_iid_data_and_permutation_invarient_embeddings/index.html b/tutorial/14_iid_data_and_permutation_invarient_embeddings/index.html
index 1b5962fe8..45fe77280 100644
--- a/tutorial/14_iid_data_and_permutation_invarient_embeddings/index.html
+++ b/tutorial/14_iid_data_and_permutation_invarient_embeddings/index.html
@@ -309,20 +309,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -338,8 +324,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -413,8 +399,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -455,8 +441,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -469,8 +455,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -483,8 +469,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/tutorial/15_mcmc_diagnostics_with_arviz/index.html b/tutorial/15_mcmc_diagnostics_with_arviz/index.html
index 9204e3276..38e41d073 100644
--- a/tutorial/15_mcmc_diagnostics_with_arviz/index.html
+++ b/tutorial/15_mcmc_diagnostics_with_arviz/index.html
@@ -311,20 +311,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -340,8 +326,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -415,8 +401,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -457,8 +443,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -471,8 +457,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -485,8 +471,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/tutorial/16_implemented_methods/index.html b/tutorial/16_implemented_methods/index.html
index 4413118b8..2406dbceb 100644
--- a/tutorial/16_implemented_methods/index.html
+++ b/tutorial/16_implemented_methods/index.html
@@ -313,20 +313,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -342,8 +328,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -480,8 +466,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -522,8 +508,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -536,8 +522,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -550,8 +536,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
@@ -1158,7 +1144,7 @@ <h2 id="utilities">Utilities<a class="headerlink" href="#utilities" title="Perma
     <nav class="md-footer__inner md-grid" aria-label="Footer">
       
         
-        <a href="../11_sampler_interface/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Sampler interface" rel="prev">
+        <a href="../01_gaussian_amortized/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Amortized inference" rel="prev">
           <div class="md-footer__button md-icon">
             <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
           </div>
@@ -1167,7 +1153,7 @@ <h2 id="utilities">Utilities<a class="headerlink" href="#utilities" title="Perma
               <span class="md-footer__direction">
                 Previous
               </span>
-              Sampler interface
+              Amortized inference
             </div>
           </div>
         </a>
diff --git a/tutorial/17_SBI_for_models_of_decision_making/index.html b/tutorial/17_SBI_for_models_of_decision_making/index.html
index 19b0125ed..fafdb05ac 100644
--- a/tutorial/17_SBI_for_models_of_decision_making/index.html
+++ b/tutorial/17_SBI_for_models_of_decision_making/index.html
@@ -309,20 +309,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -338,8 +324,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -413,8 +399,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../17_vi_posteriors/" class="md-nav__link">
-        Using Variational Inference for Building Posteriors
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
     </li>
   
@@ -455,8 +441,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -469,8 +455,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -483,8 +469,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
diff --git a/tutorial/17_vi_posteriors/index.html b/tutorial/17_vi_posteriors/index.html
index 640b5ddfd..525bd9084 100644
--- a/tutorial/17_vi_posteriors/index.html
+++ b/tutorial/17_vi_posteriors/index.html
@@ -242,14 +242,12 @@
 
   
   
-    
-  
   
     
-    <li class="md-nav__item md-nav__item--active md-nav__item--nested">
+    <li class="md-nav__item md-nav__item--nested">
       
       
-        <input class="md-nav__toggle md-toggle" data-md-toggle="__nav_3" type="checkbox" id="__nav_3" checked>
+        <input class="md-nav__toggle md-toggle" data-md-toggle="__nav_3" type="checkbox" id="__nav_3" >
       
       
       
@@ -311,20 +309,6 @@
   
   
   
-    <li class="md-nav__item">
-      <a href="../01_gaussian_amortized/" class="md-nav__link">
-        Amortized inference
-      </a>
-    </li>
-  
-
-            
-          
-            
-              
-  
-  
-  
     <li class="md-nav__item">
       <a href="../02_flexible_interface/" class="md-nav__link">
         Flexible interface
@@ -340,8 +324,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../11_sampler_interface/" class="md-nav__link">
-        Sampler interface
+      <a href="../01_gaussian_amortized/" class="md-nav__link">
+        Amortized inference
       </a>
     </li>
   
@@ -373,14 +357,12 @@
               
   
   
-    
-  
   
     
-    <li class="md-nav__item md-nav__item--active md-nav__item--nested">
+    <li class="md-nav__item md-nav__item--nested">
       
       
-        <input class="md-nav__toggle md-toggle" data-md-toggle="__nav_3_2" type="checkbox" id="__nav_3_2" checked>
+        <input class="md-nav__toggle md-toggle" data-md-toggle="__nav_3_2" type="checkbox" id="__nav_3_2" >
       
       
       
@@ -415,60 +397,11 @@
               
   
   
-    
-  
   
-    <li class="md-nav__item md-nav__item--active">
-      
-      <input class="md-nav__toggle md-toggle" data-md-toggle="toc" type="checkbox" id="__toc">
-      
-      
-        
-      
-      
-        <label class="md-nav__link md-nav__link--active" for="__toc">
-          Using Variational Inference for Building Posteriors
-          <span class="md-nav__icon md-icon"></span>
-        </label>
-      
-      <a href="./" class="md-nav__link md-nav__link--active">
-        Using Variational Inference for Building Posteriors
+    <li class="md-nav__item">
+      <a href="../11_sampler_interface/" class="md-nav__link">
+        Sampling algorithms in sbi
       </a>
-      
-        
-
-
-<nav class="md-nav md-nav--secondary" aria-label="Table of contents">
-  
-  
-  
-    
-  
-  
-    <label class="md-nav__title" for="__toc">
-      <span class="md-nav__icon md-icon"></span>
-      Table of contents
-    </label>
-    <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix>
-      
-        <li class="md-nav__item">
-  <a href="#main-syntax" class="md-nav__link">
-    Main syntax
-  </a>
-  
-</li>
-      
-        <li class="md-nav__item">
-  <a href="#linear-gaussian-example" class="md-nav__link">
-    Linear Gaussian example
-  </a>
-  
-</li>
-      
-    </ul>
-  
-</nav>
-      
     </li>
   
 
@@ -508,8 +441,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../08_restriction_estimator/" class="md-nav__link">
-        Handling invalid simulations
+      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
+        SBI with trial-based data
       </a>
     </li>
   
@@ -522,8 +455,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
-        Crafting summary statistics
+      <a href="../08_restriction_estimator/" class="md-nav__link">
+        Handling invalid simulations
       </a>
     </li>
   
@@ -536,8 +469,8 @@
   
   
     <li class="md-nav__item">
-      <a href="../14_iid_data_and_permutation_invariant_embeddings/" class="md-nav__link">
-        SBI with trial-based data
+      <a href="../10_crafting_summary_statistics/" class="md-nav__link">
+        Crafting summary statistics
       </a>
     </li>
   
@@ -920,7 +853,7 @@
                 
                 
                 <h1 id="using-variational-inference-for-building-posteriors">Using Variational Inference for Building Posteriors<a class="headerlink" href="#using-variational-inference-for-building-posteriors" title="Permanent link">&para;</a></h1>
-<p>In the previous tutorial, we saw how to build the posterior and how to specialize on one specific observation <code>x_o</code>. If one uses SNPE, then the posterior can be sampled from directly, yet this comes at the expense of necessary correction terms during training, since the samples are obtained from the &ldquo;wrong&rdquo; prior for <code>num_rounds &gt; 1</code>. For SNLE or SNRE, MCMC sampling is required, which is computationally expensive. With SNVI (sequential neural variational inference), it is possible to directly sample from the posterior without any corrections during training or without expensive MCMC for sampling. This is possible by learning the posterior with variational inference techniques. For this, an additional network (one for the likelihood or likelihood-to-evidence-ratio) must be trained first.</p>
+<p>If one uses SNPE, then the posterior can be sampled from directly (without MCMC). Contrary to that, for SNLE or SNRE, MCMC sampling is required, which is computationally expensive. With SNVI (sequential neural variational inference), it is possible to directly sample from the posterior without any corrections during training or without expensive MCMC for sampling. This is possible by learning the posterior with variational inference techniques. For this, an additional network (one for the likelihood or likelihood-to-evidence-ratio) must be trained first.</p>
 <h2 id="main-syntax">Main syntax<a class="headerlink" href="#main-syntax" title="Permanent link">&para;</a></h2>
 <div class="highlight"><pre><span></span><code><span class="n">inference</span> <span class="o">=</span> <span class="n">SNLE</span><span class="p">(</span><span class="n">prior</span><span class="p">)</span>
 
@@ -1047,41 +980,6 @@ <h2 id="linear-gaussian-example">Linear Gaussian example<a class="headerlink" hr
         
 <footer class="md-footer">
   
-    <nav class="md-footer__inner md-grid" aria-label="Footer">
-      
-        
-        <a href="../03_multiround_inference/" class="md-footer__link md-footer__link--prev" aria-label="Previous: Multi-round inference" rel="prev">
-          <div class="md-footer__button md-icon">
-            <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11h12z"/></svg>
-          </div>
-          <div class="md-footer__title">
-            <div class="md-ellipsis">
-              <span class="md-footer__direction">
-                Previous
-              </span>
-              Multi-round inference
-            </div>
-          </div>
-        </a>
-      
-      
-        
-        <a href="../04_density_estimators/" class="md-footer__link md-footer__link--next" aria-label="Next: Custom density estimators" rel="next">
-          <div class="md-footer__title">
-            <div class="md-ellipsis">
-              <span class="md-footer__direction">
-                Next
-              </span>
-              Custom density estimators
-            </div>
-          </div>
-          <div class="md-footer__button md-icon">
-            <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M4 11v2h12l-5.5 5.5 1.42 1.42L19.84 12l-7.92-7.92L10.5 5.5 16 11H4z"/></svg>
-          </div>
-        </a>
-      
-    </nav>
-  
   <div class="md-footer-meta md-typeset">
     <div class="md-footer-meta__inner md-grid">
       <div class="md-footer-copyright">