Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Scale plot #111

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion src/smefit/analyze/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
_logger = logging.getLogger(__name__)


class Report:

Check notice on line 22 in src/smefit/analyze/report.py

View check run for this annotation

codefactor.io / CodeFactor

src/smefit/analyze/report.py#L22

Too many instance attributes (8/7) (too-many-instance-attributes)
r"""Report class manager.

If :math:`\chi^2`, Fisher or Data vs Theory plots are produced it computes the
Expand Down Expand Up @@ -75,6 +75,11 @@
for fit in self.fits:
self.dataset_fits.append([data["name"] for data in fit.config["datasets"]])

# Get scales for each fit
self.data_scales = []
for fit in self.fits:
self.data_scales.append(fit.load_data_scales())

# Loads useful information about data
self.data_info = self._load_grouped_data_info(report_config["data_info"])
# Loads coefficients grouped with latex name
Expand Down Expand Up @@ -144,7 +149,9 @@

def summary(self):
"""Summary Table runner."""
summary = SummaryWriter(self.fits, self.data_info, self.coeff_info)
summary = SummaryWriter(
self.fits, self.data_info, self.coeff_info, self.data_scales
)
section_title = "Summary"
coeff_tab = "coefficient_summary"
data_tab = "dataset_summary"
Expand All @@ -159,6 +166,13 @@
tables=summary.fit_settings(),
)

figs_list = []
for fit in self.fits:
_logger.info(f"Plotting scales for: {fit.name}")
figs_list.append(f"scales_{fit.name}")
summary.plot_data_scales(path=f"{self.report}")
self._append_section("Scales", figs=figs_list)

def chi2(self, table=True, plot_experiment=None, plot_distribution=None):
r""":math:`\chi^2` table and plots runner.

Expand Down
137 changes: 136 additions & 1 deletion src/smefit/analyze/summary.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib.colors import BoundaryNorm

from ..coefficients import Coefficient
from .latex_tools import latex_packages, multicolum_table_header
Expand Down Expand Up @@ -49,13 +52,15 @@

"""

def __init__(self, fits, data_groups, coeff_config):
def __init__(self, fits, data_groups, coeff_config, data_scales):
self.fits = fits
self.data_info = data_groups
self.coeff_info = coeff_config
self.nfits = len(self.fits)
# Get names of datasets for each fit
self.dataset_fits = []
self.data_scales = data_scales

for fit in self.fits:
self.dataset_fits.append([data["name"] for data in fit.config["datasets"]])

Expand Down Expand Up @@ -198,3 +203,133 @@
]
)
return L

def plot_data_scales(self, path):

Check notice on line 207 in src/smefit/analyze/summary.py

View check run for this annotation

codefactor.io / CodeFactor

src/smefit/analyze/summary.py#L207

Too many local variables (26/22) (too-many-locals)
# Collect scales for each dataset in each group
# Doing it for all the fits
fits_datagroup_scales = []
for fit in self.data_scales:
fit_scales = {}
for group, datasets in self.data_info.groupby(level=0):
fit_scales[group] = np.array([])
datasets = datasets.droplevel(0)
for dataset, _ in datasets.items():
# concatenate the scales for each dataset in the group
fit_scales[group] = np.concatenate(
(fit_scales[group], fit[dataset])
)
fits_datagroup_scales.append(fit_scales)

# Now we plot the scales for each fit
# We plot a heatmap with groups in the x-axis and scales on the y axis
# The color of each cell will represent the scale count
# We will have a plot for each fit
for i, fit_scales in enumerate(fits_datagroup_scales):
group_names = list(fit_scales.keys())

raw_min = min(min(scales) for _, scales in fit_scales.items())
raw_max = max(max(scales) for _, scales in fit_scales.items())
bins = np.logspace(
np.log10(raw_min),
np.log10(raw_max),
21,
)

# Round to 10 if below 300, otherwise round to 100
bins = np.where(
bins < 300, np.round(bins / 10) * 10, np.round(bins / 100) * 100
)

# Adjust the first and last bin if necessary to ensure coverage
if bins[0] > raw_min:
bins[0] = (
np.floor(raw_min / 10) * 10
if raw_min < 300
else np.floor(raw_min / 100) * 100
)
if bins[-1] < raw_max:
bins[-1] = (
np.ceil(raw_max / 10) * 10
if raw_max < 300
else np.ceil(raw_max / 100) * 100
)

order = [
r"$\bar{t}t\bar{t}t + \bar{t}t\bar{b}b$",
r"$\rm Higgs$",
r"$\rm LEP$",
r"$\bar{t}t$",
r"$\bar{t}tV$",
r"$t$",
r"$tV$",
r"$VV$",
r"$\mathrm{FCC\textnormal{-}ee\:91\:GeV}$",
r"$\mathrm{FCC\textnormal{-}ee\:161\:GeV}$",
r"$\mathrm{FCC\textnormal{-}ee\:240\:GeV}$",
r"$\mathrm{FCC\textnormal{-}ee\:365\:GeV}$",
]

# Create a dictionary to map order to their indices
order_index = {name: i for i, name in enumerate(order)}

# Sort group names by their order index, keeping unmatched names in original order
sorted_group_names = sorted(
group_names,
key=lambda x: order_index.get(

Check notice on line 278 in src/smefit/analyze/summary.py

View check run for this annotation

codefactor.io / CodeFactor

src/smefit/analyze/summary.py#L278

Cell variable order_index defined in loop (cell-var-from-loop)
x, np.inf
), # Use `np.inf` for unmatched names
)

# Prepare the heatmap data
heatmap_data = []
for group in sorted_group_names:
hist, _ = np.histogram(fit_scales[group], bins=bins)
heatmap_data.append(hist)

heatmap_data = np.array(heatmap_data)

# Replace 0 values with empty strings for annotations
annot_data = np.where(heatmap_data == 0, "", heatmap_data)
# Define the bins for discrete colorbar (adjust as needed)
# Manually define the first few boundaries (0, 1, 2)
boundaries = np.array([0, 1, 2, 5])

# Append the rest of the boundaries starting from 4 and spaced by 4
boundaries = np.concatenate(
[boundaries, np.arange(10, heatmap_data.max() + 10, 10)]
)
norm = BoundaryNorm(boundaries, ncolors=256)
# Plot the heatmap
fig, ax = plt.subplots(figsize=(10, 6))
heatmap = sns.heatmap(
heatmap_data,
annot=annot_data,
fmt="",
cmap="Blues",
ax=ax,
xticklabels=[f"{int(bins[i + 1])}" for i in range(len(bins) - 1)],
yticklabels=sorted_group_names,
cbar_kws={
"ticks": boundaries,
},
norm=norm,
)

cbar = heatmap.collections[0].colorbar
cbar.set_label("\\# of Data points", fontsize=14)

# Adjust the x-tick positions to align with bin edges
xtick_positions = [i for i in range(len(bins))]

Check notice on line 322 in src/smefit/analyze/summary.py

View check run for this annotation

codefactor.io / CodeFactor

src/smefit/analyze/summary.py#L322

Unnecessary use of a comprehension, use list(range(len(bins))) instead. (unnecessary-comprehension)
ax.set_xticks(xtick_positions) # Set tick positions
ax.set_xticklabels([f"{int(bins[i])}" for i in range(len(bins))])

ax.set_title(f"Data Scales for {self.fits[i].label}", fontsize=16)
ax.set_xlabel(
"Scales [GeV]",
fontsize=14,
)
fig.tight_layout()

# Save the heatmap
fig.savefig(f"{path}/scales_{self.fits[i].name}.pdf")
fig.savefig(f"{path}/scales_{self.fits[i].name}.png")

Check notice on line 335 in src/smefit/analyze/summary.py

View check run for this annotation

codefactor.io / CodeFactor

src/smefit/analyze/summary.py#L207-L335

Complex Method
22 changes: 22 additions & 0 deletions src/smefit/fit_manager.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# -*- coding: utf-8 -*-
import json
import pathlib

import numpy as np
import pandas as pd
import yaml
from rich.progress import track

from smefit.loader import Loader

from .coefficients import CoefficientManager
from .compute_theory import make_predictions
from .loader import load_datasets
Expand Down Expand Up @@ -128,6 +131,25 @@ def load_datasets(self):
self.config.get("external_chi2", False),
)

def load_data_scales(self):
datasets = [data["name"] for data in self.config["datasets"]]
scales = {}
for dataset in datasets:

Loader.theory_path = pathlib.Path(self.config["theory_path"])
# dummy call just to get the scales
_, _, _, _, dataset_scales = Loader.load_theory(
dataset,
operators_to_keep={},
order="LO",
use_quad=False,
use_theory_covmat=False,
use_multiplicative_prescription=False,
)
scales[dataset] = dataset_scales

return scales

@property
def smeft_predictions(self):
"""Compute |SMEFT| predictions for each replica.
Expand Down
Loading