NNPDF · peterkrack · Dec 8, 2024 · Dec 8, 2024 · Dec 11, 2024 · Jan 8, 2025
diff --git a/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/data_reimplemented_PXSEC.yaml b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/data_reimplemented_PXSEC.yaml
@@ -0,0 +1,120 @@
+data_central:
+- 364.0
+- 209.0
+- 220.0
+- 243.0
+- 119.0
+- 176.0
+- 174.0
+- 140.0
+- 105.0
+- 123.0
+- 34.9
+- 28.9
+- 27.4
+- 16.2
+- 10.7
+- 3.57
+- 1.7
+- 399.0
+- 315.0
+- 277.0
+- 244.0
+- 237.0
+- 192.0
+- 166.0
+- 161.0
+- 145.0
+- 127.0
+- 45.6
+- 28.9
+- 30.9
+- 19.9
+- 12.5
+- 5.8
+- 1.79
+- 0.474
+- 424.0
+- 350.0
+- 363.0
+- 248.0
+- 208.0
+- 212.0
+- 148.0
+- 144.0
+- 143.0
+- 114.0
+- 39.7
+- 33.1
+- 27.4
+- 21.6
+- 15.1
+- 6.05
+- 1.86
+- 0.404
+- 431.0
+- 347.0
+- 386.0
+- 274.0
+- 294.0
+- 223.0
+- 169.0
+- 137.0
+- 137.0
+- 126.0
+- 46.0
+- 37.0
+- 30.2
+- 23.1
+- 14.7
+- 7.0
+- 2.24
+- 0.495
+- 560.0
+- 405.0
+- 315.0
+- 274.0
+- 318.0
+- 214.0
+- 162.0
+- 159.0
+- 135.0
+- 133.0
+- 57.1
+- 41.1
+- 34.5
+- 25.1
+- 16.4
+- 6.43
+- 2.2
+- 0.56
+- 419.0
+- 396.0
+- 331.0
+- 377.0
+- 401.0
+- 193.0
+- 153.0
+- 151.0
+- 134.0
+- 149.0
+- 59.5
+- 40.3
+- 31.3
+- 23.2
+- 15.9
+- 6.3
+- 2.38
+- 0.607
+- 198.0
+- 173.0
+- 149.0
+- 98.6
+- 71.1
+- 28.7
+- 36.8
+- 24.7
+- 14.6
+- 6.14
+- 1.95
+- 0.423
diff --git a/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/filter.py b/nnpdf_data/nnpdf_data/commondata/DYE605_Z0_38P8GEV_DW/filter.py
@@ -0,0 +1,100 @@
+from dataclasses import dataclass
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+import yaml
+
+from nnpdf_data.filter_utils.hera_utils import commondata
+from nnpdf_data.filter_utils.utils import prettify_float
+
+yaml.add_representer(float, prettify_float)
+
+
+def mergetables() -> pd.DataFrame:
+
+    table_paths = []
+    for i in range(1, 8):
+        table_paths.append(Path(f"./rawdata/Table{i}.csv"))
+
+    # List with the rapidity bins for tables 1 to 7.
+    yrap = [-0.2, -0.1, 0.0, 0.1, 0.2, 0.3, 0.4]
+
+    col_names = ["M2", "dsig", "statp", "statm", "normp", "normm", "sysp", "sysm"]
+    col_names_all = col_names + ["y", "sqrts"]
+
+    combined_df = pd.DataFrame(columns=col_names_all)
+    for i, path in enumerate(table_paths):
+        df = pd.read_csv(path, header=11, names=col_names)
+        df["y"] = yrap[i]
+        df["sqrts"] = 38.8
+        df = df[pd.to_numeric(df['dsig'], errors='coerce').notnull()]
+        combined_df = pd.concat([combined_df, df], ignore_index=True)
+
+    # In the table we have sqrt(tau) not M2; compute M2=tau*s
+    combined_df["M2"] = (combined_df["M2"] * 38.8) ** 2
+
+    return combined_df
+
+
+def nuclear_uncert_dw(tableN: Path, tablep: Path):
+    dfN = pd.read_table(tableN)
+    dfp = pd.read_table(tablep)
+    return dfN, dfp
+
+
+@dataclass
+class E605_commondata(commondata):
+    def __init__(self, data: pd.DataFrame, dataset_name: str, process: str):
+
+        # Kinematic quantities.
+        self.central_values = data["dsig"].astype(float).to_numpy()
+        self.kinematics = data[["y", "M2", "sqrts"]].astype(float).to_numpy()
+        self.kinematic_quantities = ["y", "M2", "sqrts"]
+
+        # Statistical uncertainties.
+        self.statistical_uncertainties = data["statp"]
+
+        # the overall 10% statistical uncertainty is treated as
+        # additive, while normalisation uncertainty is always treated
+        # multiplicatively
+        syst = pd.DataFrame(0.1 * self.central_values)
+
+        # Systematic uncertainties.
+        syst["norm"] = self.central_values * data["normp"].str.strip("%").astype(float) / 100
+
+        # self.systematic_uncertainties = np.dstack((stat,norm))[0]
+        self.systypes = [("ADD", "UNCORR"), ("MULT", "CORR")]
+
+        # Compute the point-to-point uncertainties
+        nrep = 999
+        norm = np.sqrt(nrep)
+        dfN, dfp = nuclear_uncert_dw(
+            Path("rawdata/nuclear/output/tables/group_result_table.csv"),
+            Path("rawdata/proton_ite/output/tables/group_result_table.csv"),
+        )
+
+        for rep in range(1, nrep + 1):
+            Delta = (dfN[f"rep_{rep:05d}"] - dfp["theory_central"]) / norm
+            syst[f"NUCLEAR{rep:05d}"] = Delta
+            self.systypes.append(("ADD", f"NUCLEAR{rep:05d}"))
+
+        self.systematic_uncertainties = syst.to_numpy()
+
+        self.process = process
+        self.dataset_name = dataset_name
+
+
+def main():
+    data = mergetables()
+    # First create the commondata variant without the nuclear uncertainties.
+    DYE605 = E605_commondata(data, "DYE605_Z0_38P8GEV", "Z0")
+    DYE605.write_new_commondata(
+        Path("data_reimplemented_PXSEC.yaml"),
+        Path("kinematics_reimplemented_PXSEC.yaml"),
+        Path("uncertainties_reimplemented_PXSEC.yaml"),
+    )
+
+
+if __name__ == "__main__":
+    main()