Changes needed for asymptotic sensitivity studies.

FlamTeam · Feb 26, 2024 · 59ab42e · 59ab42e
1 parent 0e42675
commit 59ab42e
Show file tree

Hide file tree

Showing 4 changed files with 83 additions and 32 deletions.
diff --git a/flamedisx/inference.py b/flamedisx/inference.py
@@ -15,15 +15,15 @@
 __all__ = ['LOWER_RATE_MULTIPLIER_BOUND',
            'SUPPORTED_OPTIMIZERS',
            'SUPPORTED_INTERVAL_OPTIMIZERS',
-           'FLOAT32_EPS']
+           'FLOAT64_EPS']
 
 # Setting this to 0 does work, but makes the inference rather slow
 # (at least for scipy); probably there is a relative xtol computation,
 # which fails when x -> 0.
 LOWER_RATE_MULTIPLIER_BOUND = 1e-9
 
 # Floating point precision of 32-bit computations
-FLOAT32_EPS = np.finfo(np.float32).eps
+FLOAT64_EPS = np.finfo(np.float64).eps
 
 
 class OptimizerWarning(UserWarning):
@@ -429,12 +429,12 @@ def _minimize(self):
         # The underlying tensorflow computation has float32 precision,
         # so we have to adjust the precision options
         if kwargs['method'].upper() == 'TNC':
-            kwargs['options'].setdefault('accuracy', FLOAT32_EPS**0.5)
+            kwargs['options'].setdefault('accuracy', FLOAT64_EPS**0.5)
 
         # Achtung! setdefault will not kick in if user specified 'xtol' in the
         # options.
-        kwargs['options'].setdefault('xtol', FLOAT32_EPS**0.5)
-        kwargs['options'].setdefault('gtol', 1e-2 * FLOAT32_EPS**0.25)
+        kwargs['options'].setdefault('xtol', FLOAT64_EPS**0.5)
+        kwargs['options'].setdefault('gtol', 1e-2 * FLOAT64_EPS**0.25)
 
         if self.use_hessian:
             if (kwargs['method'].lower() in ('newton-cg', 'dogleg')
@@ -541,7 +541,7 @@ def _minimize(self):
             precision = kwargs['precision']
             del kwargs['precision']
         else:
-            precision = FLOAT32_EPS
+            precision = FLOAT64_EPS
 
         if self.minuit2:
             # Minuit2 changed the API; Minuit() no longer takes 'option-like'

diff --git a/flamedisx/likelihood.py b/flamedisx/likelihood.py
@@ -235,7 +235,7 @@ def __init__(
         # Add the constraint
         if log_constraint is None:
             def log_constraint(**kwargs):
-                return 0.
+                return tf.constant(0., fd.float_type())
         self.log_constraint = log_constraint
         self.constraint_extra_args = None
 
@@ -521,10 +521,14 @@ def _log_likelihood(self,
             0.)
         if dsetname == self.dsetnames[0]:
             if constraint_extra_args is None:
-                ll += self.log_constraint(**params_unstacked)
+                ll += tf.where(tf.equal(i_batch, tf.constant(0, dtype=fd.int_type())),
+                               self.log_constraint(**params_unstacked),
+                               0.)
             else:
                 kwargs = {**params_unstacked, **constraint_extra_args}
-                ll += self.log_constraint(**kwargs)
+                ll += tf.where(tf.equal(i_batch, tf.constant(0, dtype=fd.int_type())),
+                               self.log_constraint(**kwargs),
+                               0.)
 
         # Autodifferentiation. This is why we use tensorflow:
         grad = tf.gradients(ll, grad_par_stack)[0]

diff --git a/flamedisx/non_asymptotic_inference.py b/flamedisx/non_asymptotic_inference.py
@@ -33,7 +33,7 @@ def __call__(self, mu_test, signal_source_name, guess_dict):
         bf_unconditional = self.likelihood.bestfit(guess=guess_dict, suppress_warnings=True)
 
         # Return the test statistic, unconditional fit and conditional fit
-        return self.evaluate(bf_unconditional, bf_conditional), bf_unconditional, bf_conditional
+        return self.evaluate(bf_unconditional, bf_conditional, signal_source_name, mu_test), bf_unconditional, bf_conditional
 
 
 @export
@@ -54,6 +54,29 @@ def evaluate(self, bf_unconditional, bf_conditional):
             return ts
 
 
+@export
+class TestStatisticQMu(TestStatistic):
+    """Evaluate the test statistic of equation 11 in https://arxiv.org/abs/1007.1727.
+    """
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def evaluate(self, bf_unconditional, bf_conditional, signal_source_name, mu_test):
+        ll_conditional = self.likelihood(**bf_conditional)
+        ll_unconditional = self.likelihood(**bf_unconditional)
+
+        ts = -2. * (ll_conditional - ll_unconditional)
+        if bf_unconditional[f'{signal_source_name}_rate_multiplier'] > mu_test:
+            return 0.
+        elif ts < 0.:
+            return 0.
+        else:
+            return ts
+
+    def p_value_asymptotic(self, ts_values):
+        return (1. - stats.norm.cdf(np.sqrt(ts_values)))
+
+
 @export
 class TestStatisticDistributions():
     """ Class to store test statistic distribution values (pass in as a list),
@@ -382,26 +405,26 @@ def toy_test_statistic_dist(self, test_stat_dists_SB, test_stat_dists_B,
             simulate_dict_SB, toy_data_SB, constraint_extra_args_SB = \
                 self.sample_data_constraints(mu_test, signal_source_name, likelihood)
 
-            # S+B toys
-
-            # Shift the constraint in the likelihood based on the background RMs we drew
-            likelihood.set_constraint_extra_args(**constraint_extra_args_SB)
-            # Set data
-            likelihood.set_data(toy_data_SB)
-            # Create test statistic
-            test_statistic_SB = self.test_statistic(likelihood)
-            # Guesses for fit
-            guess_dict_SB = simulate_dict_SB.copy()
-            for key, value in guess_dict_SB.items():
-                if value < 0.1:
-                    guess_dict_SB[key] = 0.1
-            # Evaluate test statistic
-            ts_result_SB = test_statistic_SB(mu_test, signal_source_name, guess_dict_SB)
-            # Save test statistic, and possibly fits
-            ts_values_SB.append(ts_result_SB[0])
-            if save_fits:
-                unconditional_bfs_SB.append(ts_result_SB[1])
-                conditional_bfs_SB.append(ts_result_SB[2])
+            # # S+B toys
+
+            # # Shift the constraint in the likelihood based on the background RMs we drew
+            # likelihood.set_constraint_extra_args(**constraint_extra_args_SB)
+            # # Set data
+            # likelihood.set_data(toy_data_SB)
+            # # Create test statistic
+            # test_statistic_SB = self.test_statistic(likelihood)
+            # # Guesses for fit
+            # guess_dict_SB = simulate_dict_SB.copy()
+            # for key, value in guess_dict_SB.items():
+            #     if value < 0.1:
+            #         guess_dict_SB[key] = 0.1
+            # # Evaluate test statistic
+            # ts_result_SB = test_statistic_SB(mu_test, signal_source_name, guess_dict_SB)
+            # # Save test statistic, and possibly fits
+            # ts_values_SB.append(ts_result_SB[0])
+            # if save_fits:
+            #     unconditional_bfs_SB.append(ts_result_SB[1])
+            #     conditional_bfs_SB.append(ts_result_SB[2])
 
             # B-only toys
 
@@ -667,3 +690,27 @@ def get_bands(self, conf_level=0.1, quantiles=[0, 1, -1, 2, -2],
             bands[signal_source] = these_bands
 
         return bands
+
+    def get_median_asymptotic(self, test_statistic, conf_level=0.1):
+        """
+        """
+        medians = dict()
+
+        # Loop over signal sources
+        for signal_source in self.signal_source_names:
+            # Get B-only test statistic distribition
+            test_stat_dists_B = self.test_stat_dists_B[signal_source]
+
+            mus = []
+            p_val_curves = []
+            # Loop over signal rate multipliers
+            for mu_test, ts_values in test_stat_dists_B.ts_dists.items():
+                mus.append(mu_test)
+                p_val_curves.append(test_statistic(None).p_value_asymptotic(ts_values))
+
+            p_val_curves = np.transpose(np.stack(p_val_curves, axis=0))
+            upper_lims = np.apply_along_axis(self.upper_lims_bands, 1, p_val_curves, mus, conf_level)
+
+            medians[signal_source] = np.quantile(np.sort(upper_lims), stats.norm.cdf(0.5))
+
+        return medians
diff --git a/flamedisx/utils.py b/flamedisx/utils.py
@@ -9,8 +9,8 @@
 
 lgamma = tf.math.lgamma
 o = tf.newaxis
-FLOAT_TYPE = tf.float32
-INT_TYPE = tf.int32
+FLOAT_TYPE = tf.float64
+INT_TYPE = tf.int64
 
 # Extreme mean and standard deviations give numerical errors
 # in the beta-binomial.