diff --git a/flamedisx/inference.py b/flamedisx/inference.py index cfeb9ca82..516a7b3c4 100644 --- a/flamedisx/inference.py +++ b/flamedisx/inference.py @@ -15,7 +15,7 @@ __all__ = ['LOWER_RATE_MULTIPLIER_BOUND', 'SUPPORTED_OPTIMIZERS', 'SUPPORTED_INTERVAL_OPTIMIZERS', - 'FLOAT32_EPS'] + 'FLOAT64_EPS'] # Setting this to 0 does work, but makes the inference rather slow # (at least for scipy); probably there is a relative xtol computation, @@ -23,7 +23,7 @@ LOWER_RATE_MULTIPLIER_BOUND = 1e-9 # Floating point precision of 32-bit computations -FLOAT32_EPS = np.finfo(np.float32).eps +FLOAT64_EPS = np.finfo(np.float64).eps class OptimizerWarning(UserWarning): @@ -429,12 +429,12 @@ def _minimize(self): # The underlying tensorflow computation has float32 precision, # so we have to adjust the precision options if kwargs['method'].upper() == 'TNC': - kwargs['options'].setdefault('accuracy', FLOAT32_EPS**0.5) + kwargs['options'].setdefault('accuracy', FLOAT64_EPS**0.5) # Achtung! setdefault will not kick in if user specified 'xtol' in the # options. - kwargs['options'].setdefault('xtol', FLOAT32_EPS**0.5) - kwargs['options'].setdefault('gtol', 1e-2 * FLOAT32_EPS**0.25) + kwargs['options'].setdefault('xtol', FLOAT64_EPS**0.5) + kwargs['options'].setdefault('gtol', 1e-2 * FLOAT64_EPS**0.25) if self.use_hessian: if (kwargs['method'].lower() in ('newton-cg', 'dogleg') @@ -541,7 +541,7 @@ def _minimize(self): precision = kwargs['precision'] del kwargs['precision'] else: - precision = FLOAT32_EPS + precision = FLOAT64_EPS if self.minuit2: # Minuit2 changed the API; Minuit() no longer takes 'option-like' diff --git a/flamedisx/likelihood.py b/flamedisx/likelihood.py index a2198f383..1e160ac93 100644 --- a/flamedisx/likelihood.py +++ b/flamedisx/likelihood.py @@ -235,7 +235,7 @@ def __init__( # Add the constraint if log_constraint is None: def log_constraint(**kwargs): - return 0. + return tf.constant(0., fd.float_type()) self.log_constraint = log_constraint self.constraint_extra_args = None @@ -521,10 +521,14 @@ def _log_likelihood(self, 0.) if dsetname == self.dsetnames[0]: if constraint_extra_args is None: - ll += self.log_constraint(**params_unstacked) + ll += tf.where(tf.equal(i_batch, tf.constant(0, dtype=fd.int_type())), + self.log_constraint(**params_unstacked), + 0.) else: kwargs = {**params_unstacked, **constraint_extra_args} - ll += self.log_constraint(**kwargs) + ll += tf.where(tf.equal(i_batch, tf.constant(0, dtype=fd.int_type())), + self.log_constraint(**kwargs), + 0.) # Autodifferentiation. This is why we use tensorflow: grad = tf.gradients(ll, grad_par_stack)[0] diff --git a/flamedisx/non_asymptotic_inference.py b/flamedisx/non_asymptotic_inference.py index bcec10578..312460375 100644 --- a/flamedisx/non_asymptotic_inference.py +++ b/flamedisx/non_asymptotic_inference.py @@ -33,7 +33,7 @@ def __call__(self, mu_test, signal_source_name, guess_dict): bf_unconditional = self.likelihood.bestfit(guess=guess_dict, suppress_warnings=True) # Return the test statistic, unconditional fit and conditional fit - return self.evaluate(bf_unconditional, bf_conditional), bf_unconditional, bf_conditional + return self.evaluate(bf_unconditional, bf_conditional, signal_source_name, mu_test), bf_unconditional, bf_conditional @export @@ -54,6 +54,29 @@ def evaluate(self, bf_unconditional, bf_conditional): return ts +@export +class TestStatisticQMu(TestStatistic): + """Evaluate the test statistic of equation 11 in https://arxiv.org/abs/1007.1727. + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def evaluate(self, bf_unconditional, bf_conditional, signal_source_name, mu_test): + ll_conditional = self.likelihood(**bf_conditional) + ll_unconditional = self.likelihood(**bf_unconditional) + + ts = -2. * (ll_conditional - ll_unconditional) + if bf_unconditional[f'{signal_source_name}_rate_multiplier'] > mu_test: + return 0. + elif ts < 0.: + return 0. + else: + return ts + + def p_value_asymptotic(self, ts_values): + return (1. - stats.norm.cdf(np.sqrt(ts_values))) + + @export class TestStatisticDistributions(): """ Class to store test statistic distribution values (pass in as a list), @@ -382,26 +405,26 @@ def toy_test_statistic_dist(self, test_stat_dists_SB, test_stat_dists_B, simulate_dict_SB, toy_data_SB, constraint_extra_args_SB = \ self.sample_data_constraints(mu_test, signal_source_name, likelihood) - # S+B toys - - # Shift the constraint in the likelihood based on the background RMs we drew - likelihood.set_constraint_extra_args(**constraint_extra_args_SB) - # Set data - likelihood.set_data(toy_data_SB) - # Create test statistic - test_statistic_SB = self.test_statistic(likelihood) - # Guesses for fit - guess_dict_SB = simulate_dict_SB.copy() - for key, value in guess_dict_SB.items(): - if value < 0.1: - guess_dict_SB[key] = 0.1 - # Evaluate test statistic - ts_result_SB = test_statistic_SB(mu_test, signal_source_name, guess_dict_SB) - # Save test statistic, and possibly fits - ts_values_SB.append(ts_result_SB[0]) - if save_fits: - unconditional_bfs_SB.append(ts_result_SB[1]) - conditional_bfs_SB.append(ts_result_SB[2]) + # # S+B toys + + # # Shift the constraint in the likelihood based on the background RMs we drew + # likelihood.set_constraint_extra_args(**constraint_extra_args_SB) + # # Set data + # likelihood.set_data(toy_data_SB) + # # Create test statistic + # test_statistic_SB = self.test_statistic(likelihood) + # # Guesses for fit + # guess_dict_SB = simulate_dict_SB.copy() + # for key, value in guess_dict_SB.items(): + # if value < 0.1: + # guess_dict_SB[key] = 0.1 + # # Evaluate test statistic + # ts_result_SB = test_statistic_SB(mu_test, signal_source_name, guess_dict_SB) + # # Save test statistic, and possibly fits + # ts_values_SB.append(ts_result_SB[0]) + # if save_fits: + # unconditional_bfs_SB.append(ts_result_SB[1]) + # conditional_bfs_SB.append(ts_result_SB[2]) # B-only toys @@ -667,3 +690,27 @@ def get_bands(self, conf_level=0.1, quantiles=[0, 1, -1, 2, -2], bands[signal_source] = these_bands return bands + + def get_median_asymptotic(self, test_statistic, conf_level=0.1): + """ + """ + medians = dict() + + # Loop over signal sources + for signal_source in self.signal_source_names: + # Get B-only test statistic distribition + test_stat_dists_B = self.test_stat_dists_B[signal_source] + + mus = [] + p_val_curves = [] + # Loop over signal rate multipliers + for mu_test, ts_values in test_stat_dists_B.ts_dists.items(): + mus.append(mu_test) + p_val_curves.append(test_statistic(None).p_value_asymptotic(ts_values)) + + p_val_curves = np.transpose(np.stack(p_val_curves, axis=0)) + upper_lims = np.apply_along_axis(self.upper_lims_bands, 1, p_val_curves, mus, conf_level) + + medians[signal_source] = np.quantile(np.sort(upper_lims), stats.norm.cdf(0.5)) + + return medians diff --git a/flamedisx/utils.py b/flamedisx/utils.py index d2b1a61aa..a0896572f 100644 --- a/flamedisx/utils.py +++ b/flamedisx/utils.py @@ -9,8 +9,8 @@ lgamma = tf.math.lgamma o = tf.newaxis -FLOAT_TYPE = tf.float32 -INT_TYPE = tf.int32 +FLOAT_TYPE = tf.float64 +INT_TYPE = tf.int64 # Extreme mean and standard deviations give numerical errors # in the beta-binomial.