Merge pull request #345 from johannbrehmer/develop

Last changes for v0.4.3
madminer-tool · Jun 6, 2019 · 2a9fef9 · 2a9fef9
2 parents a9985b3 + 186df54
commit 2a9fef9
Show file tree

Hide file tree

Showing 7 changed files with 95 additions and 88 deletions.
diff --git a/.gitignore b/.gitignore
@@ -171,3 +171,4 @@ patches/
 # Shell scripts
 black.sh
 run_sphinx.sh
+publish.sh
diff --git a/docs/conf.py b/docs/conf.py
@@ -24,7 +24,7 @@
 author = 'Johann Brehmer, Felix Kling, Irina Espejo, and Kyle Cranmer'
 
 # The short X.Y version
-version = '0.4.2'
+version = '0.4.3'
 # The full version, including alpha/beta/rc tags
 release = version
 

diff --git a/examples/tutorial_particle_physics/4a_limits.ipynb b/examples/tutorial_particle_physics/4a_limits.ipynb
diff --git a/madminer/__version__.py b/madminer/__version__.py
@@ -1 +1 @@
-__version__ = "0.4.2"
+__version__ = "0.4.3"
diff --git a/madminer/limits.py b/madminer/limits.py
@@ -7,11 +7,10 @@
 
 from madminer.analysis import DataAnalyzer
 from madminer.utils.various import mdot
-from madminer.ml import ParameterizedRatioEstimator, Ensemble
+from madminer.ml import ParameterizedRatioEstimator, Ensemble, ScoreEstimator, load_estimator
 from madminer.utils.histo import Histo
 from madminer.sampling import SampleAugmenter
 from madminer import sampling
-from madminer.ml import ScoreEstimator
 
 logger = logging.getLogger(__name__)
 
@@ -154,7 +153,7 @@ def _analyse(
         elif mode == "ml":
             assert model_file is not None
             logger.info("Loading kinematic likelihood ratio estimator")
-            model = self._load_ratio_model(model_file)
+            model = load_estimator(model_file)
 
             logger.info("Calculating kinematic log likelihood ratio with estimator")
             log_r_kin = self._calculate_log_likelihood_ratio_kinematics(x, theta_grid, model)
@@ -170,7 +169,7 @@ def _analyse(
                 summary_function = self._make_summary_statistic_function("observables", observables=hist_vars)
             elif model_file is not None:
                 logger.info("Loading score estimator and setting it up as summary statistics")
-                model = self._load_score_model(model_file)
+                model = load_estimator(model_file)
                 summary_function = self._make_summary_statistic_function("sally", model=model)
             else:
                 raise RuntimeError("For 'histo' mode, either provide histo_vars or model_file!")
@@ -244,26 +243,6 @@ def summary_function(x):
 
         return summary_function
 
-    @staticmethod
-    def _load_ratio_model(filename):
-        if os.path.isdir(filename):
-            model = Ensemble()
-            model.load(filename)
-        else:
-            model = ParameterizedRatioEstimator()
-            model.load(filename)
-        return model
-
-    @staticmethod
-    def _load_score_model(filename):
-        if os.path.isdir(filename):
-            model = Ensemble()
-            model.load(filename)
-        else:
-            model = ScoreEstimator()
-            model.load(filename)
-        return model
-
     def _calculate_xsecs(self, thetas, test_split=0.2):
         # Test split
         start_event, end_event, correction_factor = self._train_test_split(False, test_split)
@@ -310,10 +289,7 @@ def _make_histo(
         histo = Histo(theta_bins, x_bins)
         logger.debug("Generating histo data")
         theta, summary_stats = self._make_histo_data(
-            summary_function,
-            theta_grid,
-            n_toys_per_theta,
-            histo_theta_batchsize=histo_theta_batchsize,
+            summary_function, theta_grid, n_toys_per_theta, histo_theta_batchsize=histo_theta_batchsize
         )
         logger.debug(
             "Histo data has theta dimensions %s and summary stats dimensions %s", theta.shape, summary_stats.shape
@@ -331,17 +307,24 @@ def _make_histo_data(self, summary_function, thetas, n_toys_per_theta, test_spli
         for i_batch in range(n_batches):
             logger.debug("Generating histogram data for batch %s / %s", i_batch + 1, n_batches)
             theta_batch = thetas[i_batch * histo_theta_batchsize : (i_batch + 1) * histo_theta_batchsize]
-            logger.debug("Theta data: indices %s to %s, shape %s", i_batch * histo_theta_batchsize, (i_batch + 1) * histo_theta_batchsize, theta_batch.shape)
+            logger.debug(
+                "Theta data: indices %s to %s, shape %s",
+                i_batch * histo_theta_batchsize,
+                (i_batch + 1) * histo_theta_batchsize,
+                theta_batch.shape,
+            )
             x, theta, _ = sampler.sample_train_plain(
                 theta=sampling.morphing_points(theta_batch),
-                n_samples=n_toys_per_theta*len(theta_batch),
+                n_samples=n_toys_per_theta * len(theta_batch),
                 test_split=test_split,
                 filename=None,
                 folder=None,
                 suppress_logging=True,
             )
             summary_stats = summary_function(x)
-            logger.debug("Output: x has shape %s, summary_stats %s, theta %s", x.shape, summary_stats.shape, theta.shape)
+            logger.debug(
+                "Output: x has shape %s, summary_stats %s, theta %s", x.shape, summary_stats.shape, theta.shape
+            )
             if all_theta is None or all_summary_stats is None:
                 all_theta = theta
                 all_summary_stats = summary_stats

diff --git a/madminer/ml.py b/madminer/ml.py
@@ -2572,5 +2572,34 @@ def _get_estimator_class(estimator_type):
             raise RuntimeError("Unknown estimator type {}!".format(estimator_type))
 
 
+def load_estimator(filename):
+    if os.path.isdir(filename):
+        model = Ensemble()
+        model.load(filename)
+
+    else:
+        with open(filename + "_settings.json", "r") as f:
+            settings = json.load(f)
+        try:
+            estimator_type = settings["estimator_type"]
+        except KeyError:
+            raise RuntimeError("Undefined estimator type")
+
+        if estimator_type == "parameterized_ratio":
+            model = ParameterizedRatioEstimator()
+        elif estimator_type == "double_parameterized_ratio":
+            model = DoubleParameterizedRatioEstimator()
+        elif estimator_type == "score":
+            model = ScoreEstimator()
+        elif estimator_type == "likelihood":
+            model = LikelihoodEstimator()
+        else:
+            raise RuntimeError("Unknown estimator type {}!".format(estimator_type))
+
+        model.load(filename)
+
+    return model
+
+
 class TheresAGoodReasonThisDoesntWork(Exception):
     pass
diff --git a/setup.py b/setup.py
@@ -20,7 +20,7 @@
 EMAIL = '[email protected]'
 AUTHOR = 'Johann Brehmer, Felix Kling, Irina Espejo, Kyle Cranmer'
 REQUIRES_PYTHON = '>=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, <4'
-VERSION = '0.4.2'
+VERSION = '0.4.3'
 
 # What packages are required for this module to be executed?
 REQUIRED = [