Merge pull request #99 from LHCfitNikhef/NLO-keys

Making theory order local per dataset
LHCfitNikhef · Nov 29, 2024 · ec2b607 · ec2b607
2 parents 9e1a4dd + 85e9bde
commit ec2b607
Show file tree

Hide file tree

Showing 18 changed files with 88 additions and 70 deletions.
diff --git a/.pylintrc b/.pylintrc
@@ -130,7 +130,8 @@ disable=
 	global-statement,
 	too-many-public-method,
 	too-many-ancestors,
-	too-many-positional-arguments
+	too-many-positional-arguments,
+	too-many-statements
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option

diff --git a/docs/source/fitting_code/running.md b/docs/source/fitting_code/running.md
@@ -41,13 +41,14 @@ theory_path:
 
 ```
 ### Theory specifications
-The perturbative order of the QCD theory prediction (LO or NLO) should be specified using ``order``.
-``use_quad`` should be set to ``True`` for a fit with quadratic corrections, ``use_t0`` controls the use
+The default perturbative order of the theory prediction is set by the key ``default_order``. Orders may also be specified
+per datset, see [here](./example.html#datasets-to-consider-and-coefficients-to-fit) for more details.
+The order in the EFT expansion should be specified by setting ``use_quad`` to either ``True`` or ``False`` to include quadratic or only linear corrections respectively. The option ``use_t0`` controls the use
 of the ``t0`` prescription and ``use_theory_covmat`` specifies whether or not to use the theory covariance matrix
 which can be specified in the theory files.
 
 ```yaml
-order: NLO
+default_order: LO
 use_quad: False
 use_t0: False
 use_theory_covmat: True
@@ -91,22 +92,32 @@ n_samples: 1000 # number of the required samples of the posterior distribution
 
 ### Datasets to consider and coefficients to fit
 The datasets and Wilson coefficients to be included in the analysis must be listed under ``datasets``
-and ``coefficients`` respectively.
+and ``coefficients`` respectively. The default order for each dataset is taken from  ``default_order``. However, it is
+possible to specify specific orders per dataset. To do this, add the key ``order`` to the dataset entry as follows.
 
 ```yaml
 datasets:
 
-  - ATLAS_tt_8TeV_ljets_Mtt
-  - ATLAS_tt_8TeV_dilep_Mtt
-  - CMS_tt_8TeV_ljets_Ytt
-  - CMS_tt2D_8TeV_dilep_MttYtt
-  - CMS_tt_13TeV_ljets_2015_Mtt
-  - CMS_tt_13TeV_dilep_2015_Mtt
-  - CMS_tt_13TeV_ljets_2016_Mtt
-  - CMS_tt_13TeV_dilep_2016_Mtt
-  - ATLAS_tt_13TeV_ljets_2016_Mtt
-  - ATLAS_CMS_tt_AC_8TeV
-  - ATLAS_tt_AC_13TeV
+  - name: ATLAS_tt_8TeV_ljets_Mtt
+  - name: ATLAS_tt_8TeV_dilep_Mtt
+    order: NLO_QCD
+  - name: CMS_tt_8TeV_ljets_Ytt
+    order: NLO_QCD
+  - name: CMS_tt2D_8TeV_dilep_MttYtt
+    order: NLO_QCD
+  - name: CMS_tt_13TeV_ljets_2015_Mtt
+    order: NLO_QCD
+  - name: CMS_tt_13TeV_dilep_2015_Mtt
+    order: NLO_QCD
+  - name: CMS_tt_13TeV_ljets_2016_Mtt
+    order: NLO_QCD
+  - name: CMS_tt_13TeV_dilep_2016_Mtt
+    order: NLO_QCD
+  - name: ATLAS_tt_13TeV_ljets_2016_Mtt
+    order: NLO_QCD
+  - name: ATLAS_CMS_tt_AC_8TeV
+    order: NLO_QCD
+  - name: ATLAS_tt_AC_13TeV
   ...
   ...
 

diff --git a/src/smefit/analyze/pca.py b/src/smefit/analyze/pca.py
@@ -48,11 +48,11 @@ def from_dict(cls, config):
             config["data_path"],
             config["datasets"],
             config["coefficients"],
-            config["order"],
             config["use_quad"],
             config["use_theory_covmat"],
             config["use_t0"],
             config.get("use_multiplicative_prescription", False),
+            config.get("default_order", "LO"),
             config.get("theory_path", None),
             config.get("rot_to_fit_basis", None),
             config.get("uv_couplings", False),

diff --git a/src/smefit/analyze/summary.py b/src/smefit/analyze/summary.py
@@ -69,7 +69,6 @@ def fit_settings(self):
             summary_dict["EFT order"] = (
                 "Qudratic" if fit.config["use_quad"] else "Linear"
             )
-            summary_dict["pQCD"] = fit.config["order"]
             summary_dict["Replicas"] = fit.n_replica
             label = fit.label.replace(r"\ ", "").replace(r"\rm", "")
             summaries[label] = summary_dict

diff --git a/src/smefit/chi2.py b/src/smefit/chi2.py
@@ -85,11 +85,11 @@ def __init__(self, run_card, n_replica):
             run_card["data_path"],
             run_card["datasets"],
             run_card["coefficients"],
-            run_card["order"],
             run_card["use_quad"],
             run_card["use_theory_covmat"],
             False,
             self.use_multiplicative_prescription,
+            run_card.get("default_order", "LO"),
             run_card.get("theory_path", None),
             run_card.get("rot_to_fit_basis", None),
             run_card.get("uv_couplings", False),

diff --git a/src/smefit/fit_manager.py b/src/smefit/fit_manager.py
@@ -117,11 +117,11 @@ def load_datasets(self):
             self.config["data_path"],
             self.config["datasets"],
             self.config["coefficients"],
-            self.config["order"],
             self.config["use_quad"],
             self.config["use_theory_covmat"],
             False,  # t0 is not used here because in the report we look at the experimental chi2
             self.config.get("use_multiplicative_prescription", False),
+            self.config.get("default_order", "LO"),
             self.config.get("theory_path", None),
             self.config.get("rot_to_fit_basis", None),
             self.config.get("uv_couplings", False),

diff --git a/src/smefit/loader.py b/src/smefit/loader.py
@@ -545,11 +545,11 @@ def load_datasets(
     commondata_path,
     datasets,
     operators_to_keep,
-    order,
     use_quad,
     use_theory_covmat,
     use_t0,
     use_multiplicative_prescription,
+    default_order="LO",
     theory_path=None,
     rot_to_fit_basis=None,
     has_uv_couplings=False,
@@ -565,11 +565,11 @@ def load_datasets(
         commondata_path : str, pathlib.Path
             path to commondata folder, commondata excluded
         datasets : list
-            list of datasets to be loaded
+            List of datasets to be loaded
         operators_to_keep: list
             list of operators for which corrections are loaded
-        order: "LO", "NLO"
-            EFT perturbative order
+        default_order: str
+            Default perturbative order of the theory predictions
         use_quad: bool
             if True loads also |HO| corrections
         use_theory_covmat: bool
@@ -602,18 +602,16 @@ def load_datasets(
     th_cov = []
 
     Loader.commondata_path = pathlib.Path(commondata_path)
-    if theory_path is not None:
-        Loader.theory_path = pathlib.Path(theory_path)
-    else:
-        Loader.theory_path = pathlib.Path(commondata_path)
+    Loader.theory_path = pathlib.Path(theory_path or commondata_path)
 
     _logger.info(f"Applying cutoff scale: {cutoff_scale} GeV.")
-    for sset in np.unique(datasets):
+    for sset in datasets:
+        dataset_name = sset.get("name")
 
         dataset = Loader(
-            sset,
+            dataset_name,
             operators_to_keep,
-            order,
+            sset.get("order", default_order),
             use_quad,
             use_theory_covmat,
             use_multiplicative_prescription,
@@ -625,7 +623,7 @@ def load_datasets(
         if np.all(~dataset.mask):
             continue
 
-        exp_name.append(sset)
+        exp_name.append(dataset_name)
         n_data_exp.append(dataset.n_data)
         lumi_exp.append(dataset.lumi)
         exp_data.extend(dataset.central_values)

diff --git a/src/smefit/optimize/analytic.py b/src/smefit/optimize/analytic.py
@@ -82,11 +82,11 @@ def from_dict(cls, config):
             config["data_path"],
             config["datasets"],
             config["coefficients"],
-            config["order"],
             False,
             config["use_theory_covmat"],
             config["use_t0"],
             False,
+            config.get("default_order", "LO"),
             config.get("theory_path", None),
             config.get("rot_to_fit_basis", None),
             config.get("uv_couplings", False),

diff --git a/src/smefit/optimize/mc.py b/src/smefit/optimize/mc.py
@@ -121,11 +121,11 @@ def from_dict(cls, config):
             config["data_path"],
             config["datasets"],
             config["coefficients"],
-            config["order"],
             config["use_quad"],
             config["use_theory_covmat"],
             config["use_t0"],
             config.get("use_multiplicative_prescription", False),
+            config.get("default_order", "LO"),
             config.get("theory_path", None),
             config.get("rot_to_fit_basis", None),
             config.get("uv_couplings", False),

diff --git a/src/smefit/optimize/ultranest.py b/src/smefit/optimize/ultranest.py
@@ -168,11 +168,11 @@ def from_dict(cls, config):
                 config["data_path"],
                 config["datasets"],
                 operators_to_keep,
-                config["order"],
                 config["use_quad"],
                 config["use_theory_covmat"],
                 config["use_t0"],
                 config.get("use_multiplicative_prescription", False),
+                config.get("default_order", "LO"),
                 config.get("theory_path", None),
                 config.get("rot_to_fit_basis", None),
                 config.get("uv_couplings", False),

diff --git a/src/smefit/prefit/__init__.py b/src/smefit/prefit/__init__.py
@@ -19,11 +19,11 @@ def __init__(self, config):
             config["data_path"],
             config["datasets"],
             config["coefficients"],
-            config["order"],
             config["use_quad"],
             config["use_theory_covmat"],
             config["use_t0"],
             False,
+            config.get("default_order", "LO"),
             config.get("theory_path", None),
             config.get("rot_to_fit_basis", None),
             config.get("uv_couplings", False),

diff --git a/src/smefit/projections/__init__.py b/src/smefit/projections/__init__.py
@@ -19,10 +19,10 @@ def __init__(
         self,
         commondata_path,
         theory_path,
-        dataset_names,
+        datasets,
         projections_path,
         coefficients,
-        order,
+        default_order,
         use_quad,
         use_theory_covmat,
         rot_to_fit_basis,
@@ -32,10 +32,10 @@ def __init__(
     ):
         self.commondata_path = commondata_path
         self.theory_path = theory_path
-        self.dataset_names = dataset_names
+        self.datasets = datasets
         self.projections_path = projections_path
         self.coefficients = coefficients
-        self.order = order
+        self.default_order = default_order
         self.use_quad = use_quad
         self.use_theory_covmat = use_theory_covmat
         self.rot_to_fit_basis = rot_to_fit_basis
@@ -45,13 +45,13 @@ def __init__(
 
         self.datasets = load_datasets(
             self.commondata_path,
-            self.dataset_names,
+            self.datasets,
             self.coefficients,
-            self.order,
             self.use_quad,
             self.use_theory_covmat,
             self.use_t0,
             False,
+            self.default_order,
             theory_path=self.theory_path,
         )
 
@@ -82,10 +82,10 @@ def from_config(cls, projection_card):
         projections_path = pathlib.Path(
             projection_config["projections_path"]
         ).absolute()
-        dataset_names = projection_config["datasets"]
+        datasets = projection_config["datasets"]
 
         coefficients = projection_config.get("coefficients", [])
-        order = projection_config.get("order", "LO")
+        default_order = projection_config.get("default_order", "LO")
         use_quad = projection_config.get("use_quad", False)
         use_theory_covmat = projection_config.get("use_theory_covmat", True)
         rot_to_fit_basis = projection_config.get("rot_to_fit_basis", None)
@@ -98,10 +98,10 @@ def from_config(cls, projection_card):
         return cls(
             commondata_path,
             theory_path,
-            dataset_names,
+            datasets,
             projections_path,
             coefficients,
-            order,
+            default_order,
             use_quad,
             use_theory_covmat,
             rot_to_fit_basis,

diff --git a/src/smefit/rge.py b/src/smefit/rge.py
@@ -379,11 +379,12 @@ def load_scales(datasets, theory_path, default_scale=1e3, cutoff_scale=None):
         list of scales for the datasets
     """
     scales = []
-    for dataset in np.unique(datasets):
+    for dataset in datasets:
+
         Loader.theory_path = pathlib.Path(theory_path)
         # dummy call just to get the scales
         _, _, _, _, dataset_scales = Loader.load_theory(
-            dataset,
+            dataset.get("name"),
             operators_to_keep={},
             order="LO",
             use_quad=False,
@@ -397,7 +398,7 @@ def load_scales(datasets, theory_path, default_scale=1e3, cutoff_scale=None):
         else:
             scales.extend([default_scale] * len(dataset_scales))
 
-        _logger.info(f"Loaded scales for dataset {dataset}: {dataset_scales}")
+        _logger.info(f"Loaded scales for dataset {dataset['name']}: {dataset_scales}")
 
     if cutoff_scale is not None:
         scales = [scale for scale in scales if scale < cutoff_scale]

diff --git a/tests/fake_results/fake_results.yaml b/tests/fake_results/fake_results.yaml
@@ -10,14 +10,14 @@ data_path:  ./tests/fake_data
 theory_path:  ./tests/fake_data
 
 use_quad: False
-order: NLO
 use_theory_covmat: True
 
 
 # Datasets to include
 datasets:
 
-  - data_test5
+  - name: data_test5
+    order: "NLO"
 
 # Coefficients to fit
 coefficients:

diff --git a/tests/test_fisher.py b/tests/test_fisher.py
@@ -71,9 +71,8 @@ def test_fisher():
         operators_to_keep = np.array(["Op1", "Op2", "Op3"])
         dataset = load_datasets(
             commondata_path,
-            datasets=["data_test5"],
+            datasets=[{"name": "data_test5", "order": "NLO"}],
             operators_to_keep=operators_to_keep,
-            order="NLO",
             use_quad=use_quad,
             use_theory_covmat=True,
             use_t0=False,
@@ -86,9 +85,8 @@ def test_fisher():
             "result_path": None,
             "result_ID": None,
             "data_path": commondata_path,
-            "datasets": ["data_test5"],
+            "datasets": [{"name": "data_test5", "order": "NLO"}],
             "coefficients": coefficients_dict,
-            "order": "NLO",
             "use_theory_covmat": True,
             "theory_path": commondata_path,
             "use_multiplicative_prescription": True,