From a03ad7b9fac17b041b1b293acaa506e1fc882738 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Wed, 29 Jan 2025 14:48:06 -0500 Subject: [PATCH 01/16] chore: move storage out of internal Signed-off-by: Henry Schreiner --- src/boost_histogram/_internal/hist.py | 2 +- src/boost_histogram/_internal/storage.py | 66 -------------------- src/boost_histogram/storage.py | 76 ++++++++++++++++++++---- 3 files changed, 64 insertions(+), 80 deletions(-) delete mode 100644 src/boost_histogram/_internal/storage.py diff --git a/src/boost_histogram/_internal/hist.py b/src/boost_histogram/_internal/hist.py index 04d8c4da..5a5e56cc 100644 --- a/src/boost_histogram/_internal/hist.py +++ b/src/boost_histogram/_internal/hist.py @@ -27,10 +27,10 @@ import boost_histogram from boost_histogram import _core +from ..storage import Double, Storage from .axestuple import AxesTuple from .axis import Axis, Variable from .enum import Kind -from .storage import Double, Storage from .typing import Accumulator, ArrayLike, CppHistogram from .utils import cast, register, set_module from .view import MeanView, WeightedMeanView, WeightedSumView, _to_view diff --git a/src/boost_histogram/_internal/storage.py b/src/boost_histogram/_internal/storage.py deleted file mode 100644 index a6c792fe..00000000 --- a/src/boost_histogram/_internal/storage.py +++ /dev/null @@ -1,66 +0,0 @@ -from __future__ import annotations - -from typing import ClassVar - -import boost_histogram - -from .._core import accumulators # pylint: disable=no-name-in-module -from .._core import storage as store # pylint: disable=no-name-in-module -from .utils import set_module - - -# Simple mixin to provide a common base class for types -class Storage: - _family: object - - def __init_subclass__(cls, *, family: object) -> None: - super().__init_subclass__() - cls._family = family - - def __repr__(self) -> str: - return f"{self.__class__.__name__}()" - - accumulator: ClassVar[ - ( - type[int] - | type[float] - | type[accumulators.WeightedMean] - | type[accumulators.WeightedSum] - | type[accumulators.Mean] - ) - ] - - -@set_module("boost_histogram.storage") -class Int64(store.int64, Storage, family=boost_histogram): - accumulator = int - - -@set_module("boost_histogram.storage") -class Double(store.double, Storage, family=boost_histogram): - accumulator = float - - -@set_module("boost_histogram.storage") -class AtomicInt64(store.atomic_int64, Storage, family=boost_histogram): - accumulator = int - - -@set_module("boost_histogram.storage") -class Unlimited(store.unlimited, Storage, family=boost_histogram): - accumulator = float - - -@set_module("boost_histogram.storage") -class Weight(store.weight, Storage, family=boost_histogram): - accumulator = accumulators.WeightedSum - - -@set_module("boost_histogram.storage") -class Mean(store.mean, Storage, family=boost_histogram): - accumulator = accumulators.Mean - - -@set_module("boost_histogram.storage") -class WeightedMean(store.weighted_mean, Storage, family=boost_histogram): - accumulator = accumulators.WeightedMean diff --git a/src/boost_histogram/storage.py b/src/boost_histogram/storage.py index b450f5d2..4e1edffd 100644 --- a/src/boost_histogram/storage.py +++ b/src/boost_histogram/storage.py @@ -1,17 +1,13 @@ from __future__ import annotations -from ._internal.storage import ( - AtomicInt64, - Double, - Int64, - Mean, - Storage, - Unlimited, - Weight, - WeightedMean, -) - -__all__ = ( +from typing import ClassVar + +import boost_histogram + +from ._core import accumulators # pylint: disable=no-name-in-module +from ._core import storage as store # pylint: disable=no-name-in-module + +__all__ = [ "AtomicInt64", "Double", "Int64", @@ -20,4 +16,58 @@ "Unlimited", "Weight", "WeightedMean", -) +] + + +def __dir__() -> list[str]: + return __all__ + + +# Simple mixin to provide a common base class for types +class Storage: + _family: object + + def __init_subclass__(cls, *, family: object) -> None: + super().__init_subclass__() + cls._family = family + + def __repr__(self) -> str: + return f"{self.__class__.__name__}()" + + accumulator: ClassVar[ + ( + type[int] + | type[float] + | type[accumulators.WeightedMean] + | type[accumulators.WeightedSum] + | type[accumulators.Mean] + ) + ] + + +class Int64(store.int64, Storage, family=boost_histogram): + accumulator = int + + +class Double(store.double, Storage, family=boost_histogram): + accumulator = float + + +class AtomicInt64(store.atomic_int64, Storage, family=boost_histogram): + accumulator = int + + +class Unlimited(store.unlimited, Storage, family=boost_histogram): + accumulator = float + + +class Weight(store.weight, Storage, family=boost_histogram): + accumulator = accumulators.WeightedSum + + +class Mean(store.mean, Storage, family=boost_histogram): + accumulator = accumulators.Mean + + +class WeightedMean(store.weighted_mean, Storage, family=boost_histogram): + accumulator = accumulators.WeightedMean From 5c2a6d72b7d606e757a57ec764fbaff66f337ee3 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Wed, 29 Jan 2025 15:20:37 -0500 Subject: [PATCH 02/16] chore: move AxesTuple out of _internal Signed-off-by: Henry Schreiner --- src/boost_histogram/_internal/axis.py | 758 ----------------- src/boost_histogram/_internal/hist.py | 3 +- src/boost_histogram/axis/__init__.py | 765 +++++++++++++++++- .../axestuple.py => axis/_axes_tuple.py} | 6 +- 4 files changed, 757 insertions(+), 775 deletions(-) delete mode 100644 src/boost_histogram/_internal/axis.py rename src/boost_histogram/{_internal/axestuple.py => axis/_axes_tuple.py} (96%) diff --git a/src/boost_histogram/_internal/axis.py b/src/boost_histogram/_internal/axis.py deleted file mode 100644 index 0abf4e09..00000000 --- a/src/boost_histogram/_internal/axis.py +++ /dev/null @@ -1,758 +0,0 @@ -from __future__ import annotations - -import copy -from typing import Any, Callable, Iterable, Iterator, TypeVar, Union - -import numpy as np # pylint: disable=unused-import - -import boost_histogram - -from .._core import axis as ca -from .axis_transform import AxisTransform -from .traits import Traits -from .utils import cast, register, set_module - - -def _isstr(value: Any) -> bool: - """ - Check to see if this is a stringlike or a (nested) iterable of stringlikes - """ - - if isinstance(value, (str, bytes)): - return True - if hasattr(value, "__iter__"): - return all(_isstr(v) for v in value) - return False - - -def _opts(**kwargs: bool) -> set[str]: - return {k for k, v in kwargs.items() if v} - - -AxCallOrInt = Union[int, Callable[["Axis"], int]] - - -T = TypeVar("T", bound="Axis") - - -# Contains common methods and properties to all axes -@set_module("boost_histogram.axis") -class Axis: - __slots__ = ("__dict__", "_ax") - _family: object - - def __init_subclass__(cls, *, family: object) -> None: - super().__init_subclass__() - cls._family = family - - def __setattr__(self, attr: str, value: Any) -> None: - if attr == "__dict__": - self._ax.metadata = value - object.__setattr__(self, attr, value) - - def __getattr__(self, attr: str) -> Any: - if attr == "metadata": - return - raise AttributeError( - f"object {self.__class__.__name__} has no attribute {attr}" - ) - - def __init__( - self, - ax: Any, - metadata: dict[str, Any] | None, - __dict__: dict[str, Any] | None, - ) -> None: - """ - ax: the C++ object - metadata: the metadata keyword contents - __dict__: the __dict__ keyword contents - """ - - self._ax = ax - - if __dict__ is not None and metadata is not None: - raise KeyError( - "Cannot provide metadata by keyword and __dict__, use __dict__ only" - ) - if __dict__ is not None: - self._ax.metadata = __dict__ - elif metadata is not None: - self._ax.metadata["metadata"] = metadata - - self.__dict__ = self._ax.metadata - - def __setstate__(self, state: dict[str, Any]) -> None: - self._ax = state["_ax"] - self.__dict__ = self._ax.metadata - - def __getstate__(self) -> dict[str, Any]: - return {"_ax": self._ax} - - def __copy__(self: T) -> T: - other: T = self.__class__.__new__(self.__class__) - other._ax = copy.copy(self._ax) - other.__dict__ = other._ax.metadata - return other - - def index(self, value: float | str) -> int: - """ - Return the fractional index(es) given a value (or values) on the axis. - """ - - if _isstr(value): - msg = f"index({value}) cannot be a string for a numerical axis" - raise TypeError(msg) - - return self._ax.index(value) # type: ignore[no-any-return] - - def value(self, index: float) -> float: - """ - Return the value(s) given an (fractional) index (or indices). - """ - - return self._ax.value(index) # type: ignore[no-any-return] - - def bin(self, index: float) -> int | str | tuple[float, float]: - """ - Return the edges of the bins as a tuple for a - continuous axis or the bin value for a - non-continuous axis, when given an index. - """ - - return self._ax.bin(index) # type: ignore[no-any-return] - - def __eq__(self, other: Any) -> bool: - return hasattr(other, "_ax") and self._ax == other._ax - - def __ne__(self, other: Any) -> bool: - return (not hasattr(other, "_ax")) or self._ax != other._ax - - @classmethod - def _convert_cpp(cls: type[T], cpp_object: Any) -> T: - nice_ax: T = cls.__new__(cls) - nice_ax._ax = cpp_object - nice_ax.__dict__ = cpp_object.metadata - return nice_ax - - def __len__(self) -> int: - return self._ax.size # type: ignore[no-any-return] - - def __iter__( - self, - ) -> Iterator[float] | Iterator[str] | Iterator[tuple[float, float]]: - return self._ax.__iter__() # type: ignore[no-any-return] - - def _process_loc( - self, start: AxCallOrInt | None, stop: AxCallOrInt | None - ) -> tuple[int, int]: - """ - Compute start and stop into actual start and stop values in Boost.Histogram. - None -> -1 or 0 for start, -> len or len+1 for stop. If start or stop are - callable, then call them with the axes. - - For a non-ordered axes, flow is all or nothing, so this will ensure overflow - is turned off if underflow is not None. - """ - - def _process_internal(item: AxCallOrInt | None, default: int) -> int: - return default if item is None else item(self) if callable(item) else item - - underflow = -1 if self._ax.traits_underflow else 0 - overflow = 1 if self._ax.traits_overflow else 0 - - # Non-ordered axes only use flow if integrating from None to None - if not self._ax.traits_ordered and not (start is None and stop is None): - overflow = 0 - - begin = _process_internal(start, underflow) - end = _process_internal(stop, len(self) + overflow) - - return begin, end - - def __repr__(self) -> str: - arg_str = ", ".join(self._repr_args_()) - return f"{self.__class__.__name__}({arg_str})" - - def _repr_args_(self) -> list[str]: - """ - Return arg options for use in the repr as strings. - """ - - ret = [] - if self.metadata is not None: - if isinstance(self.metadata, str): - ret.append(f"metadata={self.metadata!r}") - else: - ret.append("metadata=...") - return ret - - @property - def traits(self) -> Traits: - """ - Get traits for the axis - read only properties of a specific axis. - """ - return Traits( - self._ax.traits_underflow, - self._ax.traits_overflow, - self._ax.traits_circular, - self._ax.traits_growth, - self._ax.traits_continuous, - self._ax.traits_ordered, - ) - - @property - def size(self) -> int: - """ - Return number of bins excluding under- and overflow. - """ - return self._ax.size # type: ignore[no-any-return] - - @property - def extent(self) -> int: - """ - Return number of bins including under- and overflow. - """ - return self._ax.extent # type: ignore[no-any-return] - - def __getitem__(self, i: AxCallOrInt) -> int | str | tuple[float, float]: - """ - Access a bin, using normal Python syntax for wraparound. - """ - # UHI support - if callable(i): - i = i(self) - else: - if i < 0: - i += self._ax.size - if i >= self._ax.size: - raise IndexError( - f"Out of range access, {i} is more than {self._ax.size}" - ) - assert not callable(i) - return self.bin(i) - - @property - def edges(self) -> np.typing.NDArray[Any]: - return self._ax.edges - - @property - def centers(self) -> np.typing.NDArray[Any]: - """ - An array of bin centers. - """ - return self._ax.centers - - @property - def widths(self) -> np.typing.NDArray[Any]: - """ - An array of bin widths. - """ - return self._ax.widths - - -# Contains all common methods and properties for Regular axes -@register( - { - ca.regular_uoflow, - ca.regular_uoflow_growth, - ca.regular_uflow, - ca.regular_oflow, - ca.regular_none, - ca.regular_pow, - ca.regular_trans, - ca.regular_circular, - } -) -@set_module("boost_histogram.axis") -class Regular(Axis, family=boost_histogram): - __slots__ = () - - def __init__( - self, - bins: int, - start: float, - stop: float, - *, - metadata: Any = None, - underflow: bool = True, - overflow: bool = True, - growth: bool = False, - circular: bool = False, - transform: AxisTransform | None = None, - __dict__: dict[str, Any] | None = None, - ): - """ - Make a regular axis with nice keyword arguments for underflow, - overflow, and growth. - - Parameters - ---------- - bins : int - The number of bins between start and stop - start : float - The beginning value for the axis - stop : float - The ending value for the axis - metadata : Any - Fills .metadata on the axis. - underflow : bool = True - Enable the underflow bin - overflow : bool = True - Enable the overflow bin - growth : bool = False - Allow the axis to grow if a value is encountered out of range. - Be careful, the axis will grow as large as needed. - circular : bool = False - Filling wraps around. - transform : Optional[AxisTransform] = None - Transform the regular bins (Log, Sqrt, and Pow(v)) - __dict__: Optional[Dict[str, Any]] = None - The full metadata dictionary - """ - - options = _opts( - underflow=underflow, overflow=overflow, growth=growth, circular=circular - ) - - ax: ca._BaseRegular - - if transform is not None: - if options != {"underflow", "overflow"}: - raise KeyError("Transform supplied, cannot change other options") - - if ( - not isinstance(transform, AxisTransform) - and AxisTransform in transform.__bases__ # type: ignore[unreachable] - ): - raise TypeError(f"You must pass an instance, use {transform}()") - - ax = transform._produce(bins, start, stop) - - elif options == {"growth", "underflow", "overflow"}: - ax = ca.regular_uoflow_growth(bins, start, stop) - elif options == {"underflow", "overflow"}: - ax = ca.regular_uoflow(bins, start, stop) - elif options == {"underflow"}: - ax = ca.regular_uflow(bins, start, stop) - elif options == {"overflow"}: - ax = ca.regular_oflow(bins, start, stop) - elif options in ( - {"circular", "underflow", "overflow"}, - {"circular", "overflow"}, - ): - # growth=True, underflow=False is also correct - ax = ca.regular_circular(bins, start, stop) - - elif options == set(): - ax = ca.regular_none(bins, start, stop) - else: - raise KeyError("Unsupported collection of options") - - super().__init__(ax, metadata, __dict__) - - def _repr_args_(self) -> list[str]: - "Return inner part of signature for use in repr" - - ret = [f"{self.size:g}", f"{self.edges[0]:g}", f"{self.edges[-1]:g}"] - - if self.traits.growth: - ret.append("growth=True") - elif self.traits.circular: - ret.append("circular=True") - else: - if not self.traits.underflow: - ret.append("underflow=False") - if not self.traits.overflow: - ret.append("overflow=False") - - if self.transform is not None: - ret.append(f"transform={self.transform}") - - ret += super()._repr_args_() - - return ret - - @property - def transform(self) -> AxisTransform | None: - if hasattr(self._ax, "transform"): - return cast(self, self._ax.transform, AxisTransform) - return None - - -@register( - { - ca.variable_none, - ca.variable_uflow, - ca.variable_oflow, - ca.variable_uoflow, - ca.variable_uoflow_growth, - ca.variable_circular, - } -) -@set_module("boost_histogram.axis") -class Variable(Axis, family=boost_histogram): - __slots__ = () - - def __init__( - self, - edges: Iterable[float], - *, - metadata: Any = None, - underflow: bool = True, - overflow: bool = True, - growth: bool = False, - circular: bool = False, - __dict__: dict[str, Any] | None = None, - ): - """ - Make an axis with irregularly spaced bins. Provide a list - or array of bin edges, and len(edges)-1 bins will be made. - - Parameters - ---------- - edges : Array[float] - The edges for the bins. There will be one less bin than edges. - metadata : object - Any Python object to attach to the axis, like a label. - underflow : bool = True - Enable the underflow bin - overflow : bool = True - Enable the overflow bin - circular : bool = False - Enable wraparound - growth : bool = False - Allow the axis to grow if a value is encountered out of range. - Be careful, the axis will grow as large as needed. - __dict__: Optional[Dict[str, Any]] = None - The full metadata dictionary - """ - - options = _opts( - underflow=underflow, overflow=overflow, growth=growth, circular=circular - ) - - ax: ca._BaseVariable - if options == {"growth", "underflow", "overflow"}: - ax = ca.variable_uoflow_growth(edges) - elif options == {"underflow", "overflow"}: - ax = ca.variable_uoflow(edges) - elif options == {"underflow"}: - ax = ca.variable_uflow(edges) - elif options == {"overflow"}: - ax = ca.variable_oflow(edges) - elif options in ( - {"circular", "underflow", "overflow"}, - {"circular", "overflow"}, - ): - # growth=True, underflow=False is also correct - ax = ca.variable_circular(edges) - elif options == set(): - ax = ca.variable_none(edges) - else: - raise KeyError("Unsupported collection of options") - - super().__init__(ax, metadata, __dict__) - - def _repr_args_(self) -> list[str]: - "Return inner part of signature for use in repr" - - if len(self) > 20: - ret = [repr(self.edges)] - else: - args = ", ".join(format(v, "g") for v in self.edges) - ret = [f"[{args}]"] - - if self.traits.growth: - ret.append("growth=True") - elif self.traits.circular: - ret.append("circular=True") - else: - if not self.traits.underflow: - ret.append("underflow=False") - if not self.traits.overflow: - ret.append("overflow=False") - - ret += super()._repr_args_() - - return ret - - -@register( - { - ca.integer_none, - ca.integer_uflow, - ca.integer_oflow, - ca.integer_uoflow, - ca.integer_growth, - ca.integer_circular, - } -) -@set_module("boost_histogram.axis") -class Integer(Axis, family=boost_histogram): - __slots__ = () - - def __init__( - self, - start: int, - stop: int, - *, - metadata: Any = None, - underflow: bool = True, - overflow: bool = True, - growth: bool = False, - circular: bool = False, - __dict__: dict[str, Any] | None = None, - ): - """ - Make an integer axis, with a collection of consecutive integers. - - Parameters - ---------- - start : int - The beginning value for the axis - stop : int - The ending value for the axis. (start-stop) bins will be created. - metadata : object - Any Python object to attach to the axis, like a label. - underflow : bool = True - Enable the underflow bin - overflow : bool = True - Enable the overflow bin - circular : bool = False - Enable wraparound - growth : bool = False - Allow the axis to grow if a value is encountered out of range. - Be careful, the axis will grow as large as needed. - __dict__: Optional[Dict[str, Any]] = None - The full metadata dictionary - """ - - options = _opts( - underflow=underflow, overflow=overflow, growth=growth, circular=circular - ) - - ax: ca._BaseInteger - - # underflow and overflow settings are ignored, integers are always - # finite and thus cannot end up in a flow bin when growth is on - if "growth" in options and "circular" not in options: - ax = ca.integer_growth(start, stop) - elif options == {"underflow", "overflow"}: - ax = ca.integer_uoflow(start, stop) - elif options == {"underflow"}: - ax = ca.integer_uflow(start, stop) - elif options == {"overflow"}: - ax = ca.integer_oflow(start, stop) - elif "circular" in options and "growth" not in options: - ax = ca.integer_circular(start, stop) - elif options == set(): - ax = ca.integer_none(start, stop) - else: - raise KeyError("Unsupported collection of options") - - super().__init__(ax, metadata, __dict__) - - def _repr_args_(self) -> list[str]: - "Return inner part of signature for use in repr" - - ret = [f"{self.edges[0]:g}", f"{self.edges[-1]:g}"] - - if self.traits.growth: - ret.append("growth=True") - elif self.traits.circular: - ret.append("circular=True") - else: - if not self.traits.underflow: - ret.append("underflow=False") - if not self.traits.overflow: - ret.append("overflow=False") - - ret += super()._repr_args_() - - return ret - - -class BaseCategory(Axis, family=boost_histogram): - __slots__ = () - - def _repr_args_(self) -> list[str]: - "Return inner part of signature for use in repr" - - ret = [] - - if self.traits.growth: - ret.append("growth=True") - elif not self.traits.overflow: - ret.append("overflow=False") - - ret += super()._repr_args_() - return ret - - -@set_module("boost_histogram.axis") -@register({ca.category_str_growth, ca.category_str, ca.category_str_none}) -class StrCategory(BaseCategory, family=boost_histogram): - __slots__ = () - - def __init__( - self, - categories: Iterable[str], - *, - metadata: Any = None, - growth: bool = False, - overflow: bool = True, - __dict__: dict[str, Any] | None = None, - ): - """ - Make a category axis with strings; items will - be added to a predefined list of bins or a growing (with growth=True) - list of bins. - - - Parameters - ---------- - categories : Iterator[str] - The bin values in strings. May be empty if growth is enabled. - metadata : object - Any Python object to attach to the axis, like a label. - growth : bool = False - Allow the axis to grow if a value is encountered out of range. - Be careful, the axis will grow as large as needed. - overflow : bool = True - Include an overflow bin for "missed" hits. Ignored if growth=True. - __dict__: Optional[Dict[str, Any]] = None - The full metadata dictionary - """ - - options = _opts(growth=growth, overflow=overflow) - - ax: ca._BaseCatStr - - # henryiii: We currently expand "abc" to "a", "b", "c" - some - # Python interfaces protect against that - - if "growth" in options: - ax = ca.category_str_growth(tuple(categories)) - elif options == {"overflow"}: - ax = ca.category_str(tuple(categories)) - elif not options: - ax = ca.category_str_none(tuple(categories)) - else: - raise KeyError("Unsupported collection of options") - - super().__init__(ax, metadata, __dict__) - - def index(self, value: float | str) -> int: - """ - Return the fractional index(es) given a value (or values) on the axis. - """ - - if _isstr(value): - return self._ax.index(value) # type: ignore[no-any-return] - - msg = f"index({value}) must be a string or iterable of strings for a StrCategory axis" - raise TypeError(msg) - - def _repr_args_(self) -> list[str]: - "Return inner part of signature for use in repr" - - args = ", ".join(repr(c) for c in self) - ret = [f"[{args}]"] - ret += super()._repr_args_() - return ret - - -@set_module("boost_histogram.axis") -@register({ca.category_int, ca.category_int_growth, ca.category_int_none}) -class IntCategory(BaseCategory, family=boost_histogram): - __slots__ = () - - def __init__( - self, - categories: Iterable[int], - *, - metadata: Any = None, - growth: bool = False, - overflow: bool = True, - __dict__: dict[str, Any] | None = None, - ): - """ - Make a category axis with ints; items will - be added to a predefined list of bins or a growing (with growth=True) - list of bins. An empty list is allowed if growth=True. - - - Parameters - ---------- - categories : Iterable[int] - The bin values, either ints or strings. - metadata : object - Any Python object to attach to the axis, like a label. - growth : bool = False - Allow the axis to grow if a value is encountered out of range. - Be careful, the axis will grow as large as needed. - overflow : bool = True - Include an overflow bin for "missed" hits. Ignored if growth=True. - __dict__: Optional[Dict[str, Any]] = None - The full metadata dictionary - """ - - options = _opts(growth=growth, overflow=overflow) - ax: ca._BaseCatInt - - if "growth" in options: - ax = ca.category_int_growth(tuple(categories)) - elif options == {"overflow"}: - ax = ca.category_int(tuple(categories)) - elif not options: - ax = ca.category_int_none(tuple(categories)) - else: - raise KeyError("Unsupported collection of options") - - super().__init__(ax, metadata, __dict__) - - def _repr_args_(self) -> list[str]: - "Return inner part of signature for use in repr" - - args = ", ".join(format(c, "g") for c in self) - ret = [f"[{args}]"] - ret += super()._repr_args_() - return ret - - -# Contains all common methods and properties for the boolean axis -@register({ca.boolean}) -@set_module("boost_histogram.axis") -class Boolean(Axis, family=boost_histogram): - __slots__ = () - - def __init__(self, *, metadata: Any = None, __dict__: dict[str, Any] | None = None): - """ - Make an axis for boolean values. - - Parameters - ---------- - metadata : object - Any Python object to attach to the axis, like a label. - __dict__: Optional[Dict[str, Any]] = None - The full metadata dictionary - """ - - ax = ca.boolean() - - super().__init__(ax, metadata, __dict__) - - def _repr_args_(self) -> list[str]: - "Return inner part of signature for use in repr" - ret = [] - - if self.size == 0: - ret.append("") - elif self.size == 1 and self.centers[0] < 0.75: - ret.append("") - elif self.size == 1: - ret.append("") - - ret += super()._repr_args_() - return ret diff --git a/src/boost_histogram/_internal/hist.py b/src/boost_histogram/_internal/hist.py index 5a5e56cc..471e02f6 100644 --- a/src/boost_histogram/_internal/hist.py +++ b/src/boost_histogram/_internal/hist.py @@ -27,9 +27,8 @@ import boost_histogram from boost_histogram import _core +from ..axis import AxesTuple, Axis, Variable from ..storage import Double, Storage -from .axestuple import AxesTuple -from .axis import Axis, Variable from .enum import Kind from .typing import Accumulator, ArrayLike, CppHistogram from .utils import cast, register, set_module diff --git a/src/boost_histogram/axis/__init__.py b/src/boost_histogram/axis/__init__.py index 454e8966..d6af3a12 100644 --- a/src/boost_histogram/axis/__init__.py +++ b/src/boost_histogram/axis/__init__.py @@ -1,19 +1,20 @@ from __future__ import annotations -from .._internal.axestuple import ArrayTuple, AxesTuple -from .._internal.axis import ( - Axis, - Boolean, - IntCategory, - Integer, - Regular, - StrCategory, - Variable, -) +import copy +from typing import Any, Callable, Iterable, Iterator, TypeVar, Union + +import numpy as np # pylint: disable=unused-import + +import boost_histogram + +from .._core import axis as ca +from .._internal.axis_transform import AxisTransform from .._internal.traits import Traits +from .._internal.utils import cast, register from . import transform +from ._axes_tuple import ArrayTuple, AxesTuple -__all__ = ( +__all__ = [ "ArrayTuple", "AxesTuple", "Axis", @@ -25,4 +26,746 @@ "Traits", "Variable", "transform", +] + + +def __dir__() -> list[str]: + return __all__ + + +def _isstr(value: Any) -> bool: + """ + Check to see if this is a stringlike or a (nested) iterable of stringlikes + """ + + if isinstance(value, (str, bytes)): + return True + if hasattr(value, "__iter__"): + return all(_isstr(v) for v in value) + return False + + +def _opts(**kwargs: bool) -> set[str]: + return {k for k, v in kwargs.items() if v} + + +AxCallOrInt = Union[int, Callable[["Axis"], int]] + + +T = TypeVar("T", bound="Axis") + + +# Contains common methods and properties to all axes +class Axis: + __slots__ = ("__dict__", "_ax") + _family: object + + def __init_subclass__(cls, *, family: object) -> None: + super().__init_subclass__() + cls._family = family + + def __setattr__(self, attr: str, value: Any) -> None: + if attr == "__dict__": + self._ax.metadata = value + object.__setattr__(self, attr, value) + + def __getattr__(self, attr: str) -> Any: + if attr == "metadata": + return + raise AttributeError( + f"object {self.__class__.__name__} has no attribute {attr}" + ) + + def __init__( + self, + ax: Any, + metadata: dict[str, Any] | None, + __dict__: dict[str, Any] | None, + ) -> None: + """ + ax: the C++ object + metadata: the metadata keyword contents + __dict__: the __dict__ keyword contents + """ + + self._ax = ax + + if __dict__ is not None and metadata is not None: + raise KeyError( + "Cannot provide metadata by keyword and __dict__, use __dict__ only" + ) + if __dict__ is not None: + self._ax.metadata = __dict__ + elif metadata is not None: + self._ax.metadata["metadata"] = metadata + + self.__dict__ = self._ax.metadata + + def __setstate__(self, state: dict[str, Any]) -> None: + self._ax = state["_ax"] + self.__dict__ = self._ax.metadata + + def __getstate__(self) -> dict[str, Any]: + return {"_ax": self._ax} + + def __copy__(self: T) -> T: + other: T = self.__class__.__new__(self.__class__) + other._ax = copy.copy(self._ax) + other.__dict__ = other._ax.metadata + return other + + def index(self, value: float | str) -> int: + """ + Return the fractional index(es) given a value (or values) on the axis. + """ + + if _isstr(value): + msg = f"index({value}) cannot be a string for a numerical axis" + raise TypeError(msg) + + return self._ax.index(value) # type: ignore[no-any-return] + + def value(self, index: float) -> float: + """ + Return the value(s) given an (fractional) index (or indices). + """ + + return self._ax.value(index) # type: ignore[no-any-return] + + def bin(self, index: float) -> int | str | tuple[float, float]: + """ + Return the edges of the bins as a tuple for a + continuous axis or the bin value for a + non-continuous axis, when given an index. + """ + + return self._ax.bin(index) # type: ignore[no-any-return] + + def __eq__(self, other: Any) -> bool: + return hasattr(other, "_ax") and self._ax == other._ax + + def __ne__(self, other: Any) -> bool: + return (not hasattr(other, "_ax")) or self._ax != other._ax + + @classmethod + def _convert_cpp(cls: type[T], cpp_object: Any) -> T: + nice_ax: T = cls.__new__(cls) + nice_ax._ax = cpp_object + nice_ax.__dict__ = cpp_object.metadata + return nice_ax + + def __len__(self) -> int: + return self._ax.size # type: ignore[no-any-return] + + def __iter__( + self, + ) -> Iterator[float] | Iterator[str] | Iterator[tuple[float, float]]: + return self._ax.__iter__() # type: ignore[no-any-return] + + def _process_loc( + self, start: AxCallOrInt | None, stop: AxCallOrInt | None + ) -> tuple[int, int]: + """ + Compute start and stop into actual start and stop values in Boost.Histogram. + None -> -1 or 0 for start, -> len or len+1 for stop. If start or stop are + callable, then call them with the axes. + + For a non-ordered axes, flow is all or nothing, so this will ensure overflow + is turned off if underflow is not None. + """ + + def _process_internal(item: AxCallOrInt | None, default: int) -> int: + return default if item is None else item(self) if callable(item) else item + + underflow = -1 if self._ax.traits_underflow else 0 + overflow = 1 if self._ax.traits_overflow else 0 + + # Non-ordered axes only use flow if integrating from None to None + if not self._ax.traits_ordered and not (start is None and stop is None): + overflow = 0 + + begin = _process_internal(start, underflow) + end = _process_internal(stop, len(self) + overflow) + + return begin, end + + def __repr__(self) -> str: + arg_str = ", ".join(self._repr_args_()) + return f"{self.__class__.__name__}({arg_str})" + + def _repr_args_(self) -> list[str]: + """ + Return arg options for use in the repr as strings. + """ + + ret = [] + if self.metadata is not None: + if isinstance(self.metadata, str): + ret.append(f"metadata={self.metadata!r}") + else: + ret.append("metadata=...") + return ret + + @property + def traits(self) -> Traits: + """ + Get traits for the axis - read only properties of a specific axis. + """ + return Traits( + self._ax.traits_underflow, + self._ax.traits_overflow, + self._ax.traits_circular, + self._ax.traits_growth, + self._ax.traits_continuous, + self._ax.traits_ordered, + ) + + @property + def size(self) -> int: + """ + Return number of bins excluding under- and overflow. + """ + return self._ax.size # type: ignore[no-any-return] + + @property + def extent(self) -> int: + """ + Return number of bins including under- and overflow. + """ + return self._ax.extent # type: ignore[no-any-return] + + def __getitem__(self, i: AxCallOrInt) -> int | str | tuple[float, float]: + """ + Access a bin, using normal Python syntax for wraparound. + """ + # UHI support + if callable(i): + i = i(self) + else: + if i < 0: + i += self._ax.size + if i >= self._ax.size: + raise IndexError( + f"Out of range access, {i} is more than {self._ax.size}" + ) + assert not callable(i) + return self.bin(i) + + @property + def edges(self) -> np.typing.NDArray[Any]: + return self._ax.edges + + @property + def centers(self) -> np.typing.NDArray[Any]: + """ + An array of bin centers. + """ + return self._ax.centers + + @property + def widths(self) -> np.typing.NDArray[Any]: + """ + An array of bin widths. + """ + return self._ax.widths + + +# Contains all common methods and properties for Regular axes +@register( + { + ca.regular_uoflow, + ca.regular_uoflow_growth, + ca.regular_uflow, + ca.regular_oflow, + ca.regular_none, + ca.regular_pow, + ca.regular_trans, + ca.regular_circular, + } +) +class Regular(Axis, family=boost_histogram): + __slots__ = () + + def __init__( + self, + bins: int, + start: float, + stop: float, + *, + metadata: Any = None, + underflow: bool = True, + overflow: bool = True, + growth: bool = False, + circular: bool = False, + transform: AxisTransform | None = None, + __dict__: dict[str, Any] | None = None, + ): + """ + Make a regular axis with nice keyword arguments for underflow, + overflow, and growth. + + Parameters + ---------- + bins : int + The number of bins between start and stop + start : float + The beginning value for the axis + stop : float + The ending value for the axis + metadata : Any + Fills .metadata on the axis. + underflow : bool = True + Enable the underflow bin + overflow : bool = True + Enable the overflow bin + growth : bool = False + Allow the axis to grow if a value is encountered out of range. + Be careful, the axis will grow as large as needed. + circular : bool = False + Filling wraps around. + transform : Optional[AxisTransform] = None + Transform the regular bins (Log, Sqrt, and Pow(v)) + __dict__: Optional[Dict[str, Any]] = None + The full metadata dictionary + """ + + options = _opts( + underflow=underflow, overflow=overflow, growth=growth, circular=circular + ) + + ax: ca._BaseRegular + + if transform is not None: + if options != {"underflow", "overflow"}: + raise KeyError("Transform supplied, cannot change other options") + + if ( + not isinstance(transform, AxisTransform) + and AxisTransform in transform.__bases__ # type: ignore[unreachable] + ): + raise TypeError(f"You must pass an instance, use {transform}()") + + ax = transform._produce(bins, start, stop) + + elif options == {"growth", "underflow", "overflow"}: + ax = ca.regular_uoflow_growth(bins, start, stop) + elif options == {"underflow", "overflow"}: + ax = ca.regular_uoflow(bins, start, stop) + elif options == {"underflow"}: + ax = ca.regular_uflow(bins, start, stop) + elif options == {"overflow"}: + ax = ca.regular_oflow(bins, start, stop) + elif options in ( + {"circular", "underflow", "overflow"}, + {"circular", "overflow"}, + ): + # growth=True, underflow=False is also correct + ax = ca.regular_circular(bins, start, stop) + + elif options == set(): + ax = ca.regular_none(bins, start, stop) + else: + raise KeyError("Unsupported collection of options") + + super().__init__(ax, metadata, __dict__) + + def _repr_args_(self) -> list[str]: + "Return inner part of signature for use in repr" + + ret = [f"{self.size:g}", f"{self.edges[0]:g}", f"{self.edges[-1]:g}"] + + if self.traits.growth: + ret.append("growth=True") + elif self.traits.circular: + ret.append("circular=True") + else: + if not self.traits.underflow: + ret.append("underflow=False") + if not self.traits.overflow: + ret.append("overflow=False") + + if self.transform is not None: + ret.append(f"transform={self.transform}") + + ret += super()._repr_args_() + + return ret + + @property + def transform(self) -> AxisTransform | None: + if hasattr(self._ax, "transform"): + return cast(self, self._ax.transform, AxisTransform) + return None + + +@register( + { + ca.variable_none, + ca.variable_uflow, + ca.variable_oflow, + ca.variable_uoflow, + ca.variable_uoflow_growth, + ca.variable_circular, + } +) +class Variable(Axis, family=boost_histogram): + __slots__ = () + + def __init__( + self, + edges: Iterable[float], + *, + metadata: Any = None, + underflow: bool = True, + overflow: bool = True, + growth: bool = False, + circular: bool = False, + __dict__: dict[str, Any] | None = None, + ): + """ + Make an axis with irregularly spaced bins. Provide a list + or array of bin edges, and len(edges)-1 bins will be made. + + Parameters + ---------- + edges : Array[float] + The edges for the bins. There will be one less bin than edges. + metadata : object + Any Python object to attach to the axis, like a label. + underflow : bool = True + Enable the underflow bin + overflow : bool = True + Enable the overflow bin + circular : bool = False + Enable wraparound + growth : bool = False + Allow the axis to grow if a value is encountered out of range. + Be careful, the axis will grow as large as needed. + __dict__: Optional[Dict[str, Any]] = None + The full metadata dictionary + """ + + options = _opts( + underflow=underflow, overflow=overflow, growth=growth, circular=circular + ) + + ax: ca._BaseVariable + if options == {"growth", "underflow", "overflow"}: + ax = ca.variable_uoflow_growth(edges) + elif options == {"underflow", "overflow"}: + ax = ca.variable_uoflow(edges) + elif options == {"underflow"}: + ax = ca.variable_uflow(edges) + elif options == {"overflow"}: + ax = ca.variable_oflow(edges) + elif options in ( + {"circular", "underflow", "overflow"}, + {"circular", "overflow"}, + ): + # growth=True, underflow=False is also correct + ax = ca.variable_circular(edges) + elif options == set(): + ax = ca.variable_none(edges) + else: + raise KeyError("Unsupported collection of options") + + super().__init__(ax, metadata, __dict__) + + def _repr_args_(self) -> list[str]: + "Return inner part of signature for use in repr" + + if len(self) > 20: + ret = [repr(self.edges)] + else: + args = ", ".join(format(v, "g") for v in self.edges) + ret = [f"[{args}]"] + + if self.traits.growth: + ret.append("growth=True") + elif self.traits.circular: + ret.append("circular=True") + else: + if not self.traits.underflow: + ret.append("underflow=False") + if not self.traits.overflow: + ret.append("overflow=False") + + ret += super()._repr_args_() + + return ret + + +@register( + { + ca.integer_none, + ca.integer_uflow, + ca.integer_oflow, + ca.integer_uoflow, + ca.integer_growth, + ca.integer_circular, + } ) +class Integer(Axis, family=boost_histogram): + __slots__ = () + + def __init__( + self, + start: int, + stop: int, + *, + metadata: Any = None, + underflow: bool = True, + overflow: bool = True, + growth: bool = False, + circular: bool = False, + __dict__: dict[str, Any] | None = None, + ): + """ + Make an integer axis, with a collection of consecutive integers. + + Parameters + ---------- + start : int + The beginning value for the axis + stop : int + The ending value for the axis. (start-stop) bins will be created. + metadata : object + Any Python object to attach to the axis, like a label. + underflow : bool = True + Enable the underflow bin + overflow : bool = True + Enable the overflow bin + circular : bool = False + Enable wraparound + growth : bool = False + Allow the axis to grow if a value is encountered out of range. + Be careful, the axis will grow as large as needed. + __dict__: Optional[Dict[str, Any]] = None + The full metadata dictionary + """ + + options = _opts( + underflow=underflow, overflow=overflow, growth=growth, circular=circular + ) + + ax: ca._BaseInteger + + # underflow and overflow settings are ignored, integers are always + # finite and thus cannot end up in a flow bin when growth is on + if "growth" in options and "circular" not in options: + ax = ca.integer_growth(start, stop) + elif options == {"underflow", "overflow"}: + ax = ca.integer_uoflow(start, stop) + elif options == {"underflow"}: + ax = ca.integer_uflow(start, stop) + elif options == {"overflow"}: + ax = ca.integer_oflow(start, stop) + elif "circular" in options and "growth" not in options: + ax = ca.integer_circular(start, stop) + elif options == set(): + ax = ca.integer_none(start, stop) + else: + raise KeyError("Unsupported collection of options") + + super().__init__(ax, metadata, __dict__) + + def _repr_args_(self) -> list[str]: + "Return inner part of signature for use in repr" + + ret = [f"{self.edges[0]:g}", f"{self.edges[-1]:g}"] + + if self.traits.growth: + ret.append("growth=True") + elif self.traits.circular: + ret.append("circular=True") + else: + if not self.traits.underflow: + ret.append("underflow=False") + if not self.traits.overflow: + ret.append("overflow=False") + + ret += super()._repr_args_() + + return ret + + +class BaseCategory(Axis, family=boost_histogram): + __slots__ = () + + def _repr_args_(self) -> list[str]: + "Return inner part of signature for use in repr" + + ret = [] + + if self.traits.growth: + ret.append("growth=True") + elif not self.traits.overflow: + ret.append("overflow=False") + + ret += super()._repr_args_() + return ret + + +@register({ca.category_str_growth, ca.category_str, ca.category_str_none}) +class StrCategory(BaseCategory, family=boost_histogram): + __slots__ = () + + def __init__( + self, + categories: Iterable[str], + *, + metadata: Any = None, + growth: bool = False, + overflow: bool = True, + __dict__: dict[str, Any] | None = None, + ): + """ + Make a category axis with strings; items will + be added to a predefined list of bins or a growing (with growth=True) + list of bins. + + + Parameters + ---------- + categories : Iterator[str] + The bin values in strings. May be empty if growth is enabled. + metadata : object + Any Python object to attach to the axis, like a label. + growth : bool = False + Allow the axis to grow if a value is encountered out of range. + Be careful, the axis will grow as large as needed. + overflow : bool = True + Include an overflow bin for "missed" hits. Ignored if growth=True. + __dict__: Optional[Dict[str, Any]] = None + The full metadata dictionary + """ + + options = _opts(growth=growth, overflow=overflow) + + ax: ca._BaseCatStr + + # henryiii: We currently expand "abc" to "a", "b", "c" - some + # Python interfaces protect against that + + if "growth" in options: + ax = ca.category_str_growth(tuple(categories)) + elif options == {"overflow"}: + ax = ca.category_str(tuple(categories)) + elif not options: + ax = ca.category_str_none(tuple(categories)) + else: + raise KeyError("Unsupported collection of options") + + super().__init__(ax, metadata, __dict__) + + def index(self, value: float | str) -> int: + """ + Return the fractional index(es) given a value (or values) on the axis. + """ + + if _isstr(value): + return self._ax.index(value) # type: ignore[no-any-return] + + msg = f"index({value}) must be a string or iterable of strings for a StrCategory axis" + raise TypeError(msg) + + def _repr_args_(self) -> list[str]: + "Return inner part of signature for use in repr" + + args = ", ".join(repr(c) for c in self) + ret = [f"[{args}]"] + ret += super()._repr_args_() + return ret + + +@register({ca.category_int, ca.category_int_growth, ca.category_int_none}) +class IntCategory(BaseCategory, family=boost_histogram): + __slots__ = () + + def __init__( + self, + categories: Iterable[int], + *, + metadata: Any = None, + growth: bool = False, + overflow: bool = True, + __dict__: dict[str, Any] | None = None, + ): + """ + Make a category axis with ints; items will + be added to a predefined list of bins or a growing (with growth=True) + list of bins. An empty list is allowed if growth=True. + + + Parameters + ---------- + categories : Iterable[int] + The bin values, either ints or strings. + metadata : object + Any Python object to attach to the axis, like a label. + growth : bool = False + Allow the axis to grow if a value is encountered out of range. + Be careful, the axis will grow as large as needed. + overflow : bool = True + Include an overflow bin for "missed" hits. Ignored if growth=True. + __dict__: Optional[Dict[str, Any]] = None + The full metadata dictionary + """ + + options = _opts(growth=growth, overflow=overflow) + ax: ca._BaseCatInt + + if "growth" in options: + ax = ca.category_int_growth(tuple(categories)) + elif options == {"overflow"}: + ax = ca.category_int(tuple(categories)) + elif not options: + ax = ca.category_int_none(tuple(categories)) + else: + raise KeyError("Unsupported collection of options") + + super().__init__(ax, metadata, __dict__) + + def _repr_args_(self) -> list[str]: + "Return inner part of signature for use in repr" + + args = ", ".join(format(c, "g") for c in self) + ret = [f"[{args}]"] + ret += super()._repr_args_() + return ret + + +# Contains all common methods and properties for the boolean axis +@register({ca.boolean}) +class Boolean(Axis, family=boost_histogram): + __slots__ = () + + def __init__(self, *, metadata: Any = None, __dict__: dict[str, Any] | None = None): + """ + Make an axis for boolean values. + + Parameters + ---------- + metadata : object + Any Python object to attach to the axis, like a label. + __dict__: Optional[Dict[str, Any]] = None + The full metadata dictionary + """ + + ax = ca.boolean() + + super().__init__(ax, metadata, __dict__) + + def _repr_args_(self) -> list[str]: + "Return inner part of signature for use in repr" + ret = [] + + if self.size == 0: + ret.append("") + elif self.size == 1 and self.centers[0] < 0.75: + ret.append("") + elif self.size == 1: + ret.append("") + + ret += super()._repr_args_() + return ret diff --git a/src/boost_histogram/_internal/axestuple.py b/src/boost_histogram/axis/_axes_tuple.py similarity index 96% rename from src/boost_histogram/_internal/axestuple.py rename to src/boost_histogram/axis/_axes_tuple.py index 83bcf0ba..21d96881 100644 --- a/src/boost_histogram/_internal/axestuple.py +++ b/src/boost_histogram/axis/_axes_tuple.py @@ -5,8 +5,8 @@ import numpy as np -from .axis import Axis -from .utils import set_module, zip_strict +from .._internal.utils import zip_strict +from . import Axis A = TypeVar("A", bound="ArrayTuple") @@ -16,7 +16,6 @@ class MGridOpts(TypedDict): indexing: Literal["ij", "xy"] -@set_module("boost_histogram.axis") class ArrayTuple(tuple): # type: ignore[type-arg] __slots__ = () # This is an exhaustive list as of NumPy 1.19 @@ -47,7 +46,6 @@ def broadcast(self: A) -> A: B = TypeVar("B", bound="AxesTuple") -@set_module("boost_histogram.axis") class AxesTuple(tuple): # type: ignore[type-arg] __slots__ = () _MGRIDOPTS: ClassVar[MGridOpts] = {"sparse": True, "indexing": "ij"} From 44d8ca9cdba55f5eb73cbf351148b71d50406638 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Wed, 29 Jan 2025 15:23:44 -0500 Subject: [PATCH 03/16] chore: move _internal.axis_transform to axis.transform Signed-off-by: Henry Schreiner --- .../_internal/axis_transform.py | 150 ---------------- src/boost_histogram/axis/__init__.py | 2 +- src/boost_histogram/axis/transform.py | 160 +++++++++++++++++- 3 files changed, 153 insertions(+), 159 deletions(-) delete mode 100644 src/boost_histogram/_internal/axis_transform.py diff --git a/src/boost_histogram/_internal/axis_transform.py b/src/boost_histogram/_internal/axis_transform.py deleted file mode 100644 index f038e556..00000000 --- a/src/boost_histogram/_internal/axis_transform.py +++ /dev/null @@ -1,150 +0,0 @@ -from __future__ import annotations - -import copy -from typing import Any, ClassVar, TypeVar - -import boost_histogram - -from .._core import axis as ca -from .utils import register, set_module - -T = TypeVar("T", bound="AxisTransform") - - -@set_module("boost_histogram.axis.transform") -class AxisTransform: - __slots__ = ("_this",) - _family: object - _this: ca.transform._BaseTransform - - def __init_subclass__(cls, *, family: object) -> None: - super().__init_subclass__() - cls._family = family - - def __copy__(self: T) -> T: - other: T = self.__class__.__new__(self.__class__) - other._this = copy.copy(self._this) - return other - - @classmethod - def _convert_cpp(cls: type[T], this: Any) -> T: - self: T = cls.__new__(cls) - self._this = this - return self - - def __repr__(self) -> str: - if hasattr(self, "_this"): - return repr(self._this) - - return f"{self.__class__.__name__}() # Missing _this, broken class" - - def _produce(self, bins: int, start: float, stop: float) -> Any: - raise NotImplementedError() - - def __init__(self) -> None: - "Create a new transform instance" - raise NotImplementedError() - - def forward(self, value: float) -> float: - "Compute the forward transform" - return self._this.forward(value) - - def inverse(self, value: float) -> float: - "Compute the inverse transform" - return self._this.inverse(value) - - -@set_module("boost_histogram.axis.transform") -@register({ca.transform.pow}) -class Pow(AxisTransform, family=boost_histogram): - __slots__ = () - _type = ca.regular_pow - _this: ca.transform.pow - - # Note: this comes from family - _types: ClassVar[set[type[ca.transform.pow]]] - - def __init__(self, power: float): # pylint: disable=super-init-not-called - "Create a new transform instance" - (cpp_class,) = self._types - self._this = cpp_class(power) - - @property - def power(self) -> float: - "The power of the transform" - return self._this.power - - # This one does need to be a normal method - def _produce(self, bins: int, start: float, stop: float) -> Any: - return self.__class__._type(bins, start, stop, self.power) - - -@set_module("boost_histogram.axis.transform") -@register({ca.transform.func_transform}) -class Function(AxisTransform, family=boost_histogram): - __slots__ = () - _type = ca.regular_trans - _this: ca.transform.func_transform - - # Note: this comes from family - _types: ClassVar[set[type[ca.transform.func_transform]]] - - def __init__( # pylint: disable=super-init-not-called - self, forward: Any, inverse: Any, *, convert: Any = None, name: str = "" - ): - """ - Create a functional transform from a ctypes double(double) function - pointer or any object that provides such an interface through a - ``.ctypes`` attribute (such as numba.cfunc). A pure python function *can* - be adapted to a ctypes pointer, but please use a Variable axis instead or - use something like numba to produce a compiled function pointer. You can - manually specify the repr name with ``name=``. - - Example of Numba use: - --------------------- - - @numba.cfunc(numba.float64(numba.float64,)) - def exp(x): - return math.exp(x) - - @numba.cfunc(numba.float64(numba.float64,)) - def log(x): - return math.log(x) - - Example of slow CTypes use: - --------------------------- - - ftype = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_double) - log = ftype(math.log) - exp = ftype(math.exp) - - - Now you can supply these functions, and you will get a high performance - transformation axis. - - You can also supply an optional conversion function; this will take the input - forward and inverse and call them before producing a transform. This enables - pickling, as well, since ctypes pointers are not picklable. A few common - utilities have been supplied: - - * ``convert.numba``: Compile using numba (required) - * ``convert.python``: Just call the Python function (15-90x slower than compiled) - - See also - -------- - - * ``Numbify(forward, inverse, *, name='')``: Uses convert=convert.numba - * ``PythonFunction(forward, inverse, *, name='')``: Uses convert=convert.python - - """ - - (cpp_class,) = self._types - self._this = cpp_class(forward, inverse, convert, name) - - # This one does need to be a normal method - def _produce(self, bins: int, start: float, stop: float) -> Any: - return self.__class__._type(bins, start, stop, self._this) - - -def _internal_conversion(name: str) -> Any: - return getattr(ca.transform, name) diff --git a/src/boost_histogram/axis/__init__.py b/src/boost_histogram/axis/__init__.py index d6af3a12..0e33f830 100644 --- a/src/boost_histogram/axis/__init__.py +++ b/src/boost_histogram/axis/__init__.py @@ -8,11 +8,11 @@ import boost_histogram from .._core import axis as ca -from .._internal.axis_transform import AxisTransform from .._internal.traits import Traits from .._internal.utils import cast, register from . import transform from ._axes_tuple import ArrayTuple, AxesTuple +from .transform import AxisTransform __all__ = [ "ArrayTuple", diff --git a/src/boost_histogram/axis/transform.py b/src/boost_histogram/axis/transform.py index 733d268b..10afaa88 100644 --- a/src/boost_histogram/axis/transform.py +++ b/src/boost_histogram/axis/transform.py @@ -1,13 +1,157 @@ from __future__ import annotations -from .._internal.axis_transform import ( - AxisTransform, - Function, - Pow, - _internal_conversion, -) - -__all__ = ("AxisTransform", "Function", "Pow", "log", "sqrt") +import copy +from typing import Any, ClassVar, TypeVar + +import boost_histogram + +from .._core import axis as ca +from .._internal.utils import register + +T = TypeVar("T", bound="AxisTransform") + +__all__ = ["AxisTransform", "Function", "Pow", "log", "sqrt"] + + +def __dir__() -> list[str]: + return __all__ + + +class AxisTransform: + __slots__ = ("_this",) + _family: object + _this: ca.transform._BaseTransform + + def __init_subclass__(cls, *, family: object) -> None: + super().__init_subclass__() + cls._family = family + + def __copy__(self: T) -> T: + other: T = self.__class__.__new__(self.__class__) + other._this = copy.copy(self._this) + return other + + @classmethod + def _convert_cpp(cls: type[T], this: Any) -> T: + self: T = cls.__new__(cls) + self._this = this + return self + + def __repr__(self) -> str: + if hasattr(self, "_this"): + return repr(self._this) + + return f"{self.__class__.__name__}() # Missing _this, broken class" + + def _produce(self, bins: int, start: float, stop: float) -> Any: + raise NotImplementedError() + + def __init__(self) -> None: + "Create a new transform instance" + raise NotImplementedError() + + def forward(self, value: float) -> float: + "Compute the forward transform" + return self._this.forward(value) + + def inverse(self, value: float) -> float: + "Compute the inverse transform" + return self._this.inverse(value) + + +@register({ca.transform.pow}) +class Pow(AxisTransform, family=boost_histogram): + __slots__ = () + _type = ca.regular_pow + _this: ca.transform.pow + + # Note: this comes from family + _types: ClassVar[set[type[ca.transform.pow]]] + + def __init__(self, power: float): # pylint: disable=super-init-not-called + "Create a new transform instance" + (cpp_class,) = self._types + self._this = cpp_class(power) + + @property + def power(self) -> float: + "The power of the transform" + return self._this.power + + # This one does need to be a normal method + def _produce(self, bins: int, start: float, stop: float) -> Any: + return self.__class__._type(bins, start, stop, self.power) + + +@register({ca.transform.func_transform}) +class Function(AxisTransform, family=boost_histogram): + __slots__ = () + _type = ca.regular_trans + _this: ca.transform.func_transform + + # Note: this comes from family + _types: ClassVar[set[type[ca.transform.func_transform]]] + + def __init__( # pylint: disable=super-init-not-called + self, forward: Any, inverse: Any, *, convert: Any = None, name: str = "" + ): + """ + Create a functional transform from a ctypes double(double) function + pointer or any object that provides such an interface through a + ``.ctypes`` attribute (such as numba.cfunc). A pure python function *can* + be adapted to a ctypes pointer, but please use a Variable axis instead or + use something like numba to produce a compiled function pointer. You can + manually specify the repr name with ``name=``. + + Example of Numba use: + --------------------- + + @numba.cfunc(numba.float64(numba.float64,)) + def exp(x): + return math.exp(x) + + @numba.cfunc(numba.float64(numba.float64,)) + def log(x): + return math.log(x) + + Example of slow CTypes use: + --------------------------- + + ftype = ctypes.CFUNCTYPE(ctypes.c_double, ctypes.c_double) + log = ftype(math.log) + exp = ftype(math.exp) + + + Now you can supply these functions, and you will get a high performance + transformation axis. + + You can also supply an optional conversion function; this will take the input + forward and inverse and call them before producing a transform. This enables + pickling, as well, since ctypes pointers are not picklable. A few common + utilities have been supplied: + + * ``convert.numba``: Compile using numba (required) + * ``convert.python``: Just call the Python function (15-90x slower than compiled) + + See also + -------- + + * ``Numbify(forward, inverse, *, name='')``: Uses convert=convert.numba + * ``PythonFunction(forward, inverse, *, name='')``: Uses convert=convert.python + + """ + + (cpp_class,) = self._types + self._this = cpp_class(forward, inverse, convert, name) + + # This one does need to be a normal method + def _produce(self, bins: int, start: float, stop: float) -> Any: + return self.__class__._type(bins, start, stop, self._this) + + +def _internal_conversion(name: str) -> Any: + return getattr(ca.transform, name) + sqrt = Function("_sqrt_fn", "_sq_fn", convert=_internal_conversion, name="sqrt") log = Function("_log_fn", "_exp_fn", convert=_internal_conversion, name="log") From 87c0d3aceb2715ff473c1eba156c87084f1f90f0 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Wed, 29 Jan 2025 16:03:09 -0500 Subject: [PATCH 04/16] chore: combine axestuple Signed-off-by: Henry Schreiner --- noxfile.py | 2 +- pyproject.toml | 1 + src/boost_histogram/axis/__init__.py | 131 +++++++++++++++++++++++- src/boost_histogram/axis/_axes_tuple.py | 121 ---------------------- src/boost_histogram/axis/transform.py | 2 +- tests/test_histogram_indexing.py | 4 +- 6 files changed, 132 insertions(+), 129 deletions(-) delete mode 100644 src/boost_histogram/axis/_axes_tuple.py diff --git a/noxfile.py b/noxfile.py index e23f8eb2..8d182091 100644 --- a/noxfile.py +++ b/noxfile.py @@ -128,7 +128,7 @@ def pylint(session: nox.Session) -> None: Run pylint. """ - session.install("pylint==3.2.*") + session.install("pylint==3.3.*") session.install("-e.") session.run("pylint", "boost_histogram", *session.posargs) diff --git a/pyproject.toml b/pyproject.toml index 07684862..fd1a70d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -208,6 +208,7 @@ messages_control.disable = [ "too-many-locals", "too-many-return-statements", "too-many-statements", + "too-many-positional-arguments", "wrong-import-position", ] diff --git a/src/boost_histogram/axis/__init__.py b/src/boost_histogram/axis/__init__.py index 0e33f830..1fe3a915 100644 --- a/src/boost_histogram/axis/__init__.py +++ b/src/boost_histogram/axis/__init__.py @@ -1,7 +1,18 @@ from __future__ import annotations import copy -from typing import Any, Callable, Iterable, Iterator, TypeVar, Union +from functools import partial +from typing import ( + Any, + Callable, + ClassVar, + Iterable, + Iterator, + Literal, + TypedDict, + TypeVar, + Union, +) import numpy as np # pylint: disable=unused-import @@ -9,9 +20,8 @@ from .._core import axis as ca from .._internal.traits import Traits -from .._internal.utils import cast, register +from .._internal.utils import cast, register, zip_strict from . import transform -from ._axes_tuple import ArrayTuple, AxesTuple from .transform import AxisTransform __all__ = [ @@ -297,7 +307,7 @@ def __init__( overflow: bool = True, growth: bool = False, circular: bool = False, - transform: AxisTransform | None = None, + transform: AxisTransform | None = None, # pylint: disable=redefined-outer-name __dict__: dict[str, Any] | None = None, ): """ @@ -769,3 +779,116 @@ def _repr_args_(self) -> list[str]: ret += super()._repr_args_() return ret + + +class MGridOpts(TypedDict): + sparse: bool + indexing: Literal["ij", "xy"] + + +A = TypeVar("A", bound="ArrayTuple") + + +class ArrayTuple(tuple): # type: ignore[type-arg] + __slots__ = () + # This is an exhaustive list as of NumPy 1.19 + _REDUCTIONS = frozenset(("sum", "any", "all", "min", "max", "prod")) + + def __getattr__(self, name: str) -> Any: + if name in self._REDUCTIONS: + return partial(getattr(np, name), np.broadcast_arrays(*self)) + + return self.__class__(getattr(a, name) for a in self) + + def __dir__(self) -> list[str]: + names = dir(self.__class__) + dir("np.typing.NDArray[Any]") + return sorted(n for n in names if not n.startswith("_")) + + def __call__(self, *args: Any, **kwargs: Any) -> Any: + return self.__class__(a(*args, **kwargs) for a in self) + + def broadcast(self: A) -> A: + """ + The arrays in this tuple will be compressed if possible to save memory. + Use this method to broadcast them out into their full memory + representation. + """ + return self.__class__(np.broadcast_arrays(*self)) + + +B = TypeVar("B", bound="AxesTuple") + + +class AxesTuple(tuple): # type: ignore[type-arg] + __slots__ = () + _MGRIDOPTS: ClassVar[MGridOpts] = {"sparse": True, "indexing": "ij"} + + def __init__(self, __iterable: Iterable[Axis]) -> None: + for item in self: + if not isinstance(item, Axis): + raise TypeError( + f"Only an iterable of Axis supported in AxesTuple, got {item}" + ) + super().__init__() + + @property + def size(self) -> tuple[int, ...]: + return tuple(s.size for s in self) + + @property + def extent(self) -> tuple[int, ...]: + return tuple(s.extent for s in self) + + @property + def centers(self) -> ArrayTuple: + gen = (s.centers for s in self) + return ArrayTuple(np.meshgrid(*gen, **self._MGRIDOPTS)) + + @property + def edges(self) -> ArrayTuple: + gen = (s.edges for s in self) + return ArrayTuple(np.meshgrid(*gen, **self._MGRIDOPTS)) + + @property + def widths(self) -> ArrayTuple: + gen = (s.widths for s in self) + return ArrayTuple(np.meshgrid(*gen, **self._MGRIDOPTS)) + + def value(self, *indexes: float) -> tuple[float, ...]: + if len(indexes) != len(self): + raise IndexError( + "Must have the same number of arguments as the number of axes" + ) + return tuple(self[i].value(indexes[i]) for i in range(len(indexes))) + + def bin(self, *indexes: float) -> tuple[float, ...]: + if len(indexes) != len(self): + raise IndexError( + "Must have the same number of arguments as the number of axes" + ) + return tuple(self[i].bin(indexes[i]) for i in range(len(indexes))) + + def index(self, *values: float) -> tuple[float, ...]: # type: ignore[override, override] + if len(values) != len(self): + raise IndexError( + "Must have the same number of arguments as the number of axes" + ) + return tuple(self[i].index(values[i]) for i in range(len(values))) + + def __getitem__(self, item: Any) -> Any: + result = super().__getitem__(item) + return self.__class__(result) if isinstance(result, tuple) else result + + def __getattr__(self, attr: str) -> tuple[Any, ...]: + return tuple(getattr(s, attr) for s in self) + + def __setattr__(self, attr: str, values: Any) -> None: + try: + super().__setattr__(attr, values) + except AttributeError: + for s, v in zip_strict(self, values): + s.__setattr__(attr, v) + + value.__doc__ = Axis.value.__doc__ + index.__doc__ = Axis.index.__doc__ + bin.__doc__ = Axis.bin.__doc__ diff --git a/src/boost_histogram/axis/_axes_tuple.py b/src/boost_histogram/axis/_axes_tuple.py deleted file mode 100644 index 21d96881..00000000 --- a/src/boost_histogram/axis/_axes_tuple.py +++ /dev/null @@ -1,121 +0,0 @@ -from __future__ import annotations - -from functools import partial -from typing import Any, ClassVar, Iterable, Literal, TypedDict, TypeVar - -import numpy as np - -from .._internal.utils import zip_strict -from . import Axis - -A = TypeVar("A", bound="ArrayTuple") - - -class MGridOpts(TypedDict): - sparse: bool - indexing: Literal["ij", "xy"] - - -class ArrayTuple(tuple): # type: ignore[type-arg] - __slots__ = () - # This is an exhaustive list as of NumPy 1.19 - _REDUCTIONS = frozenset(("sum", "any", "all", "min", "max", "prod")) - - def __getattr__(self, name: str) -> Any: - if name in self._REDUCTIONS: - return partial(getattr(np, name), np.broadcast_arrays(*self)) - - return self.__class__(getattr(a, name) for a in self) - - def __dir__(self) -> list[str]: - names = dir(self.__class__) + dir("np.typing.NDArray[Any]") - return sorted(n for n in names if not n.startswith("_")) - - def __call__(self, *args: Any, **kwargs: Any) -> Any: - return self.__class__(a(*args, **kwargs) for a in self) - - def broadcast(self: A) -> A: - """ - The arrays in this tuple will be compressed if possible to save memory. - Use this method to broadcast them out into their full memory - representation. - """ - return self.__class__(np.broadcast_arrays(*self)) - - -B = TypeVar("B", bound="AxesTuple") - - -class AxesTuple(tuple): # type: ignore[type-arg] - __slots__ = () - _MGRIDOPTS: ClassVar[MGridOpts] = {"sparse": True, "indexing": "ij"} - - def __init__(self, __iterable: Iterable[Axis]) -> None: - for item in self: - if not isinstance(item, Axis): - raise TypeError( - f"Only an iterable of Axis supported in AxesTuple, got {item}" - ) - super().__init__() - - @property - def size(self) -> tuple[int, ...]: - return tuple(s.size for s in self) - - @property - def extent(self) -> tuple[int, ...]: - return tuple(s.extent for s in self) - - @property - def centers(self) -> ArrayTuple: - gen = (s.centers for s in self) - return ArrayTuple(np.meshgrid(*gen, **self._MGRIDOPTS)) - - @property - def edges(self) -> ArrayTuple: - gen = (s.edges for s in self) - return ArrayTuple(np.meshgrid(*gen, **self._MGRIDOPTS)) - - @property - def widths(self) -> ArrayTuple: - gen = (s.widths for s in self) - return ArrayTuple(np.meshgrid(*gen, **self._MGRIDOPTS)) - - def value(self, *indexes: float) -> tuple[float, ...]: - if len(indexes) != len(self): - raise IndexError( - "Must have the same number of arguments as the number of axes" - ) - return tuple(self[i].value(indexes[i]) for i in range(len(indexes))) - - def bin(self, *indexes: float) -> tuple[float, ...]: - if len(indexes) != len(self): - raise IndexError( - "Must have the same number of arguments as the number of axes" - ) - return tuple(self[i].bin(indexes[i]) for i in range(len(indexes))) - - def index(self, *values: float) -> tuple[float, ...]: # type: ignore[override, override] - if len(values) != len(self): - raise IndexError( - "Must have the same number of arguments as the number of axes" - ) - return tuple(self[i].index(values[i]) for i in range(len(values))) - - def __getitem__(self, item: Any) -> Any: - result = super().__getitem__(item) - return self.__class__(result) if isinstance(result, tuple) else result - - def __getattr__(self, attr: str) -> tuple[Any, ...]: - return tuple(getattr(s, attr) for s in self) - - def __setattr__(self, attr: str, values: Any) -> None: - try: - super().__setattr__(attr, values) - except AttributeError: - for s, v in zip_strict(self, values): - s.__setattr__(attr, v) - - value.__doc__ = Axis.value.__doc__ - index.__doc__ = Axis.index.__doc__ - bin.__doc__ = Axis.bin.__doc__ diff --git a/src/boost_histogram/axis/transform.py b/src/boost_histogram/axis/transform.py index 10afaa88..ed7fa9ea 100644 --- a/src/boost_histogram/axis/transform.py +++ b/src/boost_histogram/axis/transform.py @@ -19,7 +19,7 @@ def __dir__() -> list[str]: class AxisTransform: __slots__ = ("_this",) - _family: object + _family: ClassVar[object] # pylint: disable=declare-non-slot _this: ca.transform._BaseTransform def __init_subclass__(cls, *, family: object) -> None: diff --git a/tests/test_histogram_indexing.py b/tests/test_histogram_indexing.py index 7dd94ea8..0c4f54e5 100644 --- a/tests/test_histogram_indexing.py +++ b/tests/test_histogram_indexing.py @@ -377,7 +377,7 @@ def test_pick_flowbin(ax): def test_axes_tuple(): h = bh.Histogram(bh.axis.Regular(10, 0, 1)) - assert isinstance(h.axes[:1], bh._internal.axestuple.AxesTuple) + assert isinstance(h.axes[:1], bh.axis.AxesTuple) assert isinstance(h.axes[0], bh.axis.Regular) (before,) = h.axes.centers[:1] @@ -390,7 +390,7 @@ def test_axes_tuple_Nd(): h = bh.Histogram( bh.axis.Integer(0, 5), bh.axis.Integer(0, 4), bh.axis.Integer(0, 6) ) - assert isinstance(h.axes[:2], bh._internal.axestuple.AxesTuple) + assert isinstance(h.axes[:2], bh.axis.AxesTuple) assert isinstance(h.axes[1], bh.axis.Integer) b1, b2 = h.axes.centers[1:3] From 2a19a154dfc62b010b7eff8833ae9cd34380748d Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Wed, 29 Jan 2025 16:26:05 -0500 Subject: [PATCH 05/16] chore: move traits into axes Signed-off-by: Henry Schreiner --- src/boost_histogram/_internal/traits.py | 18 ------------------ src/boost_histogram/axis/__init__.py | 17 ++++++++++++++++- 2 files changed, 16 insertions(+), 19 deletions(-) delete mode 100644 src/boost_histogram/_internal/traits.py diff --git a/src/boost_histogram/_internal/traits.py b/src/boost_histogram/_internal/traits.py deleted file mode 100644 index 0d377e4c..00000000 --- a/src/boost_histogram/_internal/traits.py +++ /dev/null @@ -1,18 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass - - -@dataclass(order=True, frozen=True) -class Traits: - underflow: bool = False - overflow: bool = False - circular: bool = False - growth: bool = False - continuous: bool = False - ordered: bool = False - - @property - def discrete(self) -> bool: - "True if axis is not continuous" - return not self.continuous diff --git a/src/boost_histogram/axis/__init__.py b/src/boost_histogram/axis/__init__.py index 1fe3a915..705bbde0 100644 --- a/src/boost_histogram/axis/__init__.py +++ b/src/boost_histogram/axis/__init__.py @@ -1,6 +1,7 @@ from __future__ import annotations import copy +from dataclasses import dataclass from functools import partial from typing import ( Any, @@ -19,7 +20,6 @@ import boost_histogram from .._core import axis as ca -from .._internal.traits import Traits from .._internal.utils import cast, register, zip_strict from . import transform from .transform import AxisTransform @@ -62,6 +62,21 @@ def _opts(**kwargs: bool) -> set[str]: AxCallOrInt = Union[int, Callable[["Axis"], int]] +@dataclass(order=True, frozen=True) +class Traits: + underflow: bool = False + overflow: bool = False + circular: bool = False + growth: bool = False + continuous: bool = False + ordered: bool = False + + @property + def discrete(self) -> bool: + "True if axis is not continuous" + return not self.continuous + + T = TypeVar("T", bound="Axis") From c959bbf86feb42ad759ae211844bfc237e0f3fbf Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Wed, 29 Jan 2025 16:29:06 -0500 Subject: [PATCH 06/16] chore: drop unused code Signed-off-by: Henry Schreiner --- src/boost_histogram/_internal/deprecated.py | 29 --------------------- 1 file changed, 29 deletions(-) delete mode 100644 src/boost_histogram/_internal/deprecated.py diff --git a/src/boost_histogram/_internal/deprecated.py b/src/boost_histogram/_internal/deprecated.py deleted file mode 100644 index 72463cef..00000000 --- a/src/boost_histogram/_internal/deprecated.py +++ /dev/null @@ -1,29 +0,0 @@ -from __future__ import annotations - -import functools -import warnings -from typing import Any - -# Warning: this should not be directly used on properties. It will trigger on -# tab completion - ALL tab completion that could include this property. -# ob. will produce a warning, for example. Instead use a hidden method and -# a __getattr__ if the property was not settable. - - -class deprecated: - def __init__(self, reason: str, name: str = "") -> None: - self._reason = reason - self._name = name - - def __call__(self, func: Any) -> Any: - @functools.wraps(func) - def decorated_func(*args: Any, **kwargs: Any) -> Any: - warnings.warn( - f"{self._name or func.__name__} is deprecated: {self._reason}", - category=FutureWarning, - stacklevel=2, - ) - return func(*args, **kwargs) - - decorated_func.__doc__ = f"DEPRECATED: {self._reason}\n{func.__doc__}" - return decorated_func From eef5efd9a0c33f763cab70190ad15111e4815824 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Wed, 29 Jan 2025 16:32:14 -0500 Subject: [PATCH 07/16] chore: move typing out of _internal Signed-off-by: Henry Schreiner --- src/boost_histogram/_internal/hist.py | 2 +- src/boost_histogram/_internal/view.py | 2 +- src/boost_histogram/accumulators.py | 2 +- src/boost_histogram/tag.py | 2 +- src/boost_histogram/{_internal => }/typing.py | 0 5 files changed, 4 insertions(+), 4 deletions(-) rename src/boost_histogram/{_internal => }/typing.py (100%) diff --git a/src/boost_histogram/_internal/hist.py b/src/boost_histogram/_internal/hist.py index 471e02f6..f66d94b1 100644 --- a/src/boost_histogram/_internal/hist.py +++ b/src/boost_histogram/_internal/hist.py @@ -29,8 +29,8 @@ from ..axis import AxesTuple, Axis, Variable from ..storage import Double, Storage +from ..typing import Accumulator, ArrayLike, CppHistogram from .enum import Kind -from .typing import Accumulator, ArrayLike, CppHistogram from .utils import cast, register, set_module from .view import MeanView, WeightedMeanView, WeightedSumView, _to_view diff --git a/src/boost_histogram/_internal/view.py b/src/boost_histogram/_internal/view.py index dff89cfc..7baaecd1 100644 --- a/src/boost_histogram/_internal/view.py +++ b/src/boost_histogram/_internal/view.py @@ -5,7 +5,7 @@ import numpy as np from ..accumulators import Mean, WeightedMean, WeightedSum -from .typing import ArrayLike, StrIndex, Ufunc +from ..typing import ArrayLike, StrIndex, Ufunc UFMethod = Literal["__call__", "reduce", "reduceat", "accumulate", "outer", "at"] diff --git a/src/boost_histogram/accumulators.py b/src/boost_histogram/accumulators.py index c3294587..aca9acab 100644 --- a/src/boost_histogram/accumulators.py +++ b/src/boost_histogram/accumulators.py @@ -6,7 +6,7 @@ WeightedMean, WeightedSum, ) -from ._internal.typing import Accumulator +from .typing import Accumulator __all__ = ("Accumulator", "Mean", "Sum", "WeightedMean", "WeightedSum") diff --git a/src/boost_histogram/tag.py b/src/boost_histogram/tag.py index 5b54c990..dcedf7b9 100644 --- a/src/boost_histogram/tag.py +++ b/src/boost_histogram/tag.py @@ -9,7 +9,7 @@ if TYPE_CHECKING: from uhi.typing.plottable import PlottableAxis -from ._internal.typing import AxisLike +from .typing import AxisLike __all__ = ("Locator", "Slicer", "at", "loc", "overflow", "rebin", "sum", "underflow") diff --git a/src/boost_histogram/_internal/typing.py b/src/boost_histogram/typing.py similarity index 100% rename from src/boost_histogram/_internal/typing.py rename to src/boost_histogram/typing.py From 82b7068b1f7b52a335fb9ca7b07eb1273f67efe9 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Wed, 29 Jan 2025 16:35:30 -0500 Subject: [PATCH 08/16] chore: move view out of _internal Signed-off-by: Henry Schreiner --- src/boost_histogram/_internal/hist.py | 2 +- src/boost_histogram/{_internal => }/view.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename src/boost_histogram/{_internal => }/view.py (98%) diff --git a/src/boost_histogram/_internal/hist.py b/src/boost_histogram/_internal/hist.py index f66d94b1..18fd11a2 100644 --- a/src/boost_histogram/_internal/hist.py +++ b/src/boost_histogram/_internal/hist.py @@ -30,9 +30,9 @@ from ..axis import AxesTuple, Axis, Variable from ..storage import Double, Storage from ..typing import Accumulator, ArrayLike, CppHistogram +from ..view import MeanView, WeightedMeanView, WeightedSumView, _to_view from .enum import Kind from .utils import cast, register, set_module -from .view import MeanView, WeightedMeanView, WeightedSumView, _to_view if TYPE_CHECKING: from builtins import ellipsis diff --git a/src/boost_histogram/_internal/view.py b/src/boost_histogram/view.py similarity index 98% rename from src/boost_histogram/_internal/view.py rename to src/boost_histogram/view.py index 7baaecd1..c4a8556b 100644 --- a/src/boost_histogram/_internal/view.py +++ b/src/boost_histogram/view.py @@ -4,8 +4,8 @@ import numpy as np -from ..accumulators import Mean, WeightedMean, WeightedSum -from ..typing import ArrayLike, StrIndex, Ufunc +from .accumulators import Mean, WeightedMean, WeightedSum +from .typing import ArrayLike, StrIndex, Ufunc UFMethod = Literal["__call__", "reduce", "reduceat", "accumulate", "outer", "at"] From fd6d3b8e9cb12a339aa7b9920e97936c2f1834d9 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Wed, 29 Jan 2025 16:37:21 -0500 Subject: [PATCH 09/16] chore: move utils out of _internal Signed-off-by: Henry Schreiner --- src/boost_histogram/_internal/hist.py | 2 +- src/boost_histogram/{_internal/utils.py => _utils.py} | 0 src/boost_histogram/axis/__init__.py | 2 +- src/boost_histogram/axis/transform.py | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename src/boost_histogram/{_internal/utils.py => _utils.py} (100%) diff --git a/src/boost_histogram/_internal/hist.py b/src/boost_histogram/_internal/hist.py index 18fd11a2..684d9339 100644 --- a/src/boost_histogram/_internal/hist.py +++ b/src/boost_histogram/_internal/hist.py @@ -27,12 +27,12 @@ import boost_histogram from boost_histogram import _core +from .._utils import cast, register, set_module from ..axis import AxesTuple, Axis, Variable from ..storage import Double, Storage from ..typing import Accumulator, ArrayLike, CppHistogram from ..view import MeanView, WeightedMeanView, WeightedSumView, _to_view from .enum import Kind -from .utils import cast, register, set_module if TYPE_CHECKING: from builtins import ellipsis diff --git a/src/boost_histogram/_internal/utils.py b/src/boost_histogram/_utils.py similarity index 100% rename from src/boost_histogram/_internal/utils.py rename to src/boost_histogram/_utils.py diff --git a/src/boost_histogram/axis/__init__.py b/src/boost_histogram/axis/__init__.py index 705bbde0..dffe8704 100644 --- a/src/boost_histogram/axis/__init__.py +++ b/src/boost_histogram/axis/__init__.py @@ -20,7 +20,7 @@ import boost_histogram from .._core import axis as ca -from .._internal.utils import cast, register, zip_strict +from .._utils import cast, register, zip_strict from . import transform from .transform import AxisTransform diff --git a/src/boost_histogram/axis/transform.py b/src/boost_histogram/axis/transform.py index ed7fa9ea..20dca369 100644 --- a/src/boost_histogram/axis/transform.py +++ b/src/boost_histogram/axis/transform.py @@ -6,7 +6,7 @@ import boost_histogram from .._core import axis as ca -from .._internal.utils import register +from .._utils import register T = TypeVar("T", bound="AxisTransform") From b94e58d5e1dd2466628e655ab8320d114dc5daef Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Wed, 29 Jan 2025 16:46:32 -0500 Subject: [PATCH 10/16] chore: remove rest of _internals Signed-off-by: Henry Schreiner --- docs/api/boost_histogram.rst | 14 +- pyproject.toml | 2 +- src/boost_histogram/__init__.py | 1259 ++++++++++++++++++++- src/boost_histogram/_internal/__init__.py | 0 src/boost_histogram/_internal/enum.py | 18 - src/boost_histogram/_internal/hist.py | 1237 -------------------- src/boost_histogram/_utils.py | 13 - src/boost_histogram/numpy.py | 10 +- 8 files changed, 1272 insertions(+), 1281 deletions(-) delete mode 100644 src/boost_histogram/_internal/__init__.py delete mode 100644 src/boost_histogram/_internal/enum.py delete mode 100644 src/boost_histogram/_internal/hist.py diff --git a/docs/api/boost_histogram.rst b/docs/api/boost_histogram.rst index cd3972f1..302bb372 100644 --- a/docs/api/boost_histogram.rst +++ b/docs/api/boost_histogram.rst @@ -42,10 +42,18 @@ boost\_histogram.tag :undoc-members: :show-inheritance: -boost\_histogram.version -======================== +boost\_histogram.typing +======================= + +.. automodule:: boost_histogram.typing + :members: + :undoc-members: + :show-inheritance: + +boost\_histogram.view +===================== -.. automodule:: boost_histogram.version +.. automodule:: boost_histogram.view :members: :undoc-members: :show-inheritance: diff --git a/pyproject.toml b/pyproject.toml index fd1a70d5..ee6f9990 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -239,7 +239,7 @@ ignore = [ "PT011", "PT013", # Incorrect pytest codes "ISC001", # Conflicts with the formatter ] -typing-modules = ["boost_histogram._internal.typing"] +typing-modules = ["boost_histogram.typing"] isort.required-imports = ["from __future__ import annotations"] diff --git a/src/boost_histogram/__init__.py b/src/boost_histogram/__init__.py index 093b0f92..0e80b765 100644 --- a/src/boost_histogram/__init__.py +++ b/src/boost_histogram/__init__.py @@ -1,8 +1,37 @@ from __future__ import annotations +import collections.abc +import copy +import logging +import threading +import typing +import warnings +from enum import Enum +from os import cpu_count +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Iterable, + List, + Mapping, + NewType, + SupportsIndex, + Tuple, + TypeVar, + Union, +) + +import numpy as np + +import boost_histogram +from boost_histogram import _core + from . import accumulators, axis, numpy, storage -from ._internal.enum import Kind -from ._internal.hist import Histogram, IndexingExpr +from ._utils import cast, register +from .axis import AxesTuple, Axis, Variable +from .storage import Double, Storage from .tag import ( # pylint: disable=redefined-builtin loc, overflow, @@ -10,6 +39,11 @@ sum, underflow, ) +from .typing import Accumulator, ArrayLike, CppHistogram +from .view import MeanView, WeightedMeanView, WeightedSumView, _to_view + +if TYPE_CHECKING: + from builtins import ellipsis # pylint: disable-next=import-error from .version import version as __version__ @@ -28,7 +62,21 @@ raise new_exception from err -__all__ = ( +# This is a StrEnum as defined in Python 3.10 +class Kind(str, Enum): + COUNT = "COUNT" + MEAN = "MEAN" + + # This cast + type ignore is really odd, so it deserves a quick + # explanation. If we just set this like StrEnum does, then mypy complains + # that the type is changing (str -> Kind). If we type: ignore, then + # MyPy claims that the type: ignore is not needed. If we cast, we get the + # same error as before. But if we cast and type: ignore, it now works. + # Will report to MyPy. Tested on 0.800. + __str__ = typing.cast(Callable[["Kind"], str], str.__str__) # type: ignore[assignment] + + +__all__ = [ "Histogram", "IndexingExpr", "Kind", @@ -42,7 +90,11 @@ "storage", "sum", "underflow", -) +] + + +def __dir__() -> list[str]: + return __all__ # Support cloudpickle - pybind11 submodules do not have __file__ attributes @@ -53,3 +105,1202 @@ _core.axis.transform.__file__ = _core.__file__ _core.hist.__file__ = _core.__file__ _core.storage.__file__ = _core.__file__ + + +NOTHING = object() + + +_histograms: set[type[CppHistogram]] = { + _core.hist.any_double, + _core.hist.any_int64, + _core.hist.any_atomic_int64, + _core.hist.any_unlimited, + _core.hist.any_weight, + _core.hist.any_mean, + _core.hist.any_weighted_mean, +} + +logger = logging.getLogger(__name__) + + +CppAxis = NewType("CppAxis", object) + +SimpleIndexing = Union[SupportsIndex, slice] +InnerIndexing = Union[SimpleIndexing, Callable[[Axis], int]] +FullInnerIndexing = Union[InnerIndexing, List[InnerIndexing]] +IndexingWithMapping = Union[FullInnerIndexing, Mapping[int, FullInnerIndexing]] +IndexingExpr = Union[IndexingWithMapping, Tuple[IndexingWithMapping, ...], "ellipsis"] + +T = TypeVar("T") + + +def _fill_cast( + value: T, *, inner: bool = False +) -> T | np.typing.NDArray[Any] | tuple[T, ...]: + """ + Convert to NumPy arrays. Some buffer objects do not get converted by forcecast. + If not called by itself (inner=False), then will work through one level of tuple/list. + """ + if value is None or isinstance(value, (str, bytes)): + return value + + if not inner and isinstance(value, (tuple, list)): + return tuple(_fill_cast(a, inner=True) for a in value) + + if hasattr(value, "__iter__") or hasattr(value, "__array__"): + return np.asarray(value) + + return value + + +def mean_storage_sample_check(sample: ArrayLike | None) -> None: + if sample is None: + raise TypeError("Sample key-argument (sample=) needs to be provided.") + seqs = (collections.abc.Sequence, np.ndarray) + msg1 = f"Sample key-argument needs to be a sequence, {sample.__class__.__name__} given." + if isinstance(sample, str) and not isinstance(sample, seqs): + raise ValueError(msg1) + sample_dim = np.array(sample).ndim + msg2 = f"Sample key-argument needs to be 1 dimensional, {sample_dim} given." + if sample_dim != 1: + raise ValueError(msg2) + + +def _arg_shortcut(item: tuple[int, float, float] | Axis | CppAxis) -> CppAxis: + if isinstance(item, tuple) and len(item) == 3: + msg = "Using () directly in constructor is a developer shortcut and will be removed in a future version" + warnings.warn(msg, FutureWarning, stacklevel=4) + return _core.axis.regular_uoflow(item[0], item[1], item[2]) # type: ignore[return-value] + + if isinstance(item, Axis): + return item._ax # type: ignore[no-any-return] + + raise TypeError("Only axes supported in histogram constructor") + + +def _expand_ellipsis(indexes: Iterable[Any], rank: int) -> list[Any]: + indexes = list(indexes) + number_ellipses = indexes.count(Ellipsis) + if number_ellipses == 0: + return indexes + if number_ellipses == 1: + index = indexes.index(Ellipsis) + additional = rank + 1 - len(indexes) + if additional < 0: + raise IndexError("too many indices for histogram") + + # Fill out the ellipsis with empty slices + return indexes[:index] + [slice(None)] * additional + indexes[index + 1 :] + + raise IndexError("an index can only have a single ellipsis ('...')") + + +H = TypeVar("H", bound="Histogram") + + +# We currently do not cast *to* a histogram, but this is consistent +# and could be used later. +@register(_histograms) # type: ignore[arg-type] +class Histogram: + # Note this is a __slots__ __dict__ class! + __slots__ = ( + "__dict__", + "_hist", + "axes", + ) + # .metadata and ._variance_known are part of the dict + + _family: ClassVar[object] = boost_histogram + + axes: AxesTuple + _hist: CppHistogram + _variance_known: bool + + def __init_subclass__(cls, *, family: object | None = None) -> None: + """ + Sets the family for the histogram. This should be a unique object (such + as the main module of your package) that is consistently set across all + subclasses. When converting back from C++, casting will try to always + pick the best matching family from the loaded subclasses for Axis and + such. + """ + super().__init_subclass__() + cls._family = family if family is not None else object() + + @typing.overload + def __init__(self, *args: Histogram) -> None: ... + + @typing.overload + def __init__(self, *args: CppHistogram, metadata: Any = ...) -> None: ... + + @typing.overload + def __init__( + self, + *axes: Axis | CppAxis, + storage: Storage = ..., + metadata: Any = ..., + ) -> None: ... + + def __init__( + self, + *axes: Axis | CppAxis | Histogram | CppHistogram, + storage: Storage = Double(), # noqa: B008 + metadata: Any = None, + ) -> None: + """ + Construct a new histogram. + + If you pass in a single argument, this will be treated as a + histogram and this will convert the histogram to this type of + histogram. + + Parameters + ---------- + *args : Axis + Provide 1 or more axis instances. + storage : Storage = bh.storage.Double() + Select a storage to use in the histogram + metadata : Any = None + Data that is passed along if a new histogram is created + """ + self._variance_known = True + + # Allow construction from a raw histogram object (internal) + if len(axes) == 1 and isinstance(axes[0], tuple(_histograms)): + cpp_hist: CppHistogram = axes[0] # type: ignore[assignment] + self._from_histogram_cpp(cpp_hist) + if metadata: + self.metadata = metadata + return + + # If we construct with another Histogram as the only positional argument, + # support that too + if len(axes) == 1 and isinstance(axes[0], Histogram): + normal_hist: Histogram = axes[0] + self._from_histogram_object(normal_hist) + if metadata: + self.metadata = metadata + return + + # Support objects that provide a to_boost method, like Uproot + if len(axes) == 1 and hasattr(axes[0], "_to_boost_histogram_"): + self._from_histogram_object(axes[0]._to_boost_histogram_()) + return + + if storage is None: + storage = Double() # type: ignore[unreachable] + + self.metadata = metadata + + # Check for missed parenthesis or incorrect types + if not isinstance(storage, Storage): + msg_storage = ( # type: ignore[unreachable] + "Passing in an initialized storage has been removed. Please add ()." + ) + msg_unknown = "Only storages allowed in storage argument" + raise KeyError(msg_storage if issubclass(storage, Storage) else msg_unknown) + + # Allow a tuple to represent a regular axis + axes = tuple(_arg_shortcut(arg) for arg in axes) # type: ignore[arg-type] + + if len(axes) > _core.hist._axes_limit: + msg = f"Too many axes, must be less than {_core.hist._axes_limit}" + raise IndexError(msg) + + # Check all available histograms, and if the storage matches, return that one + for h in _histograms: + if isinstance(storage, h._storage_type): + self._hist = h(axes, storage) # type: ignore[arg-type] + self.axes = self._generate_axes_() + return + + raise TypeError("Unsupported storage") + + @classmethod + def _clone( + cls: type[H], + _hist: Histogram | CppHistogram, + *, + other: Histogram | None = None, + memo: Any = NOTHING, + ) -> H: + """ + Clone a histogram (possibly of a different base). Does not trigger __init__. + This will copy data from `other=` if non-None, otherwise metadata gets copied from the input. + """ + + self = cls.__new__(cls) + if isinstance(_hist, tuple(_histograms)): + self._from_histogram_cpp(_hist) # type: ignore[arg-type] + if other is not None: + return cls._clone(self, other=other, memo=memo) + return self + + assert isinstance(_hist, Histogram) + + if other is None: + other = _hist + + self._from_histogram_object(_hist) + + if memo is NOTHING: + self.__dict__ = copy.copy(other.__dict__) + else: + self.__dict__ = copy.deepcopy(other.__dict__, memo) + + for ax in self.axes: + if memo is NOTHING: + ax.__dict__ = copy.copy(ax._ax.metadata) + else: + ax.__dict__ = copy.deepcopy(ax._ax.metadata, memo) + return self + + def _new_hist(self: H, _hist: CppHistogram, memo: Any = NOTHING) -> H: + """ + Return a new histogram given a new _hist, copying current metadata. + """ + return self.__class__._clone(_hist, other=self, memo=memo) + + def _from_histogram_cpp(self, other: CppHistogram) -> None: + """ + Import a Cpp histogram. + """ + self._variance_known = True + self._hist = other + self.metadata = None + self.axes = self._generate_axes_() + + def _from_histogram_object(self, other: Histogram) -> None: + """ + Convert self into a new histogram object based on another, possibly + converting from a different subclass. + """ + self._hist = other._hist + self.__dict__ = copy.copy(other.__dict__) + self.axes = self._generate_axes_() + for ax in self.axes: + ax.__dict__ = copy.copy(ax._ax.metadata) + + # Allow custom behavior on either "from" or "to" + other._export_bh_(self) + self._import_bh_() + + def _import_bh_(self) -> None: + """ + If any post-processing is needed to pass a histogram between libraries, a + subclass can implement it here. self is the new instance in the current + (converted-to) class. + """ + + @classmethod + def _export_bh_(cls, self: Histogram) -> None: + """ + If any preparation is needed to pass a histogram between libraries, a subclass can + implement it here. cls is the current class being converted from, and self is the + instance in the class being converted to. + """ + + def _generate_axes_(self) -> AxesTuple: + """ + This is called to fill in the axes. Subclasses can override it if they need + to change the axes tuple. + """ + + return AxesTuple(self._axis(i) for i in range(self.ndim)) + + @property + def ndim(self) -> int: + """ + Number of axes (dimensions) of the histogram. + """ + return self._hist.rank() + + def view( + self, flow: bool = False + ) -> np.typing.NDArray[Any] | WeightedSumView | WeightedMeanView | MeanView: + """ + Return a view into the data, optionally with overflow turned on. + """ + return _to_view(self._hist.view(flow)) + + def __array__( + self, + dtype: np.typing.DTypeLike | None = None, + *, + # pylint: disable-next=redefined-outer-name + copy: bool | None = None, + ) -> np.typing.NDArray[Any]: + # The copy kw is new in NumPy 2.0 + kwargs = {} + if copy is not None: + kwargs["copy"] = copy + return np.asarray(self.view(False), dtype=dtype, **kwargs) # type: ignore[call-overload] + + def __eq__(self, other: Any) -> bool: + return hasattr(other, "_hist") and self._hist == other._hist + + def __ne__(self, other: Any) -> bool: + return (not hasattr(other, "_hist")) or self._hist != other._hist + + def __add__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + result = self.copy(deep=False) + return result.__iadd__(other) + + def __iadd__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + if isinstance(other, (int, float)) and other == 0: + return self + self._compute_inplace_op("__iadd__", other) + + # Addition may change the axes if they can grow + self.axes = self._generate_axes_() + + return self + + def __radd__(self: H, other: np.typing.NDArray[Any] | float) -> H: + return self + other + + def __sub__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + result = self.copy(deep=False) + return result.__isub__(other) + + def __isub__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + if isinstance(other, (int, float)) and other == 0: + return self + self._compute_inplace_op("__isub__", other) + + self.axes = self._generate_axes_() + + return self + + # If these fail, the underlying object throws the correct error + def __mul__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + result = self.copy(deep=False) + return result._compute_inplace_op("__imul__", other) + + def __rmul__(self: H, other: np.typing.NDArray[Any] | float) -> H: + return self * other + + def __truediv__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + result = self.copy(deep=False) + return result._compute_inplace_op("__itruediv__", other) + + def __div__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + result = self.copy(deep=False) + return result._compute_inplace_op("__idiv__", other) + + def __idiv__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + return self._compute_inplace_op("__idiv__", other) + + def __itruediv__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + return self._compute_inplace_op("__itruediv__", other) + + def __imul__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + return self._compute_inplace_op("__imul__", other) + + def _compute_inplace_op( + self: H, name: str, other: Histogram | np.typing.NDArray[Any] | float + ) -> H: + # Also takes CppHistogram, but that confuses mypy because it's hard to pick out + if isinstance(other, Histogram): + getattr(self._hist, name)(other._hist) + elif isinstance(other, tuple(_histograms)): + getattr(self._hist, name)(other) + elif hasattr(other, "shape") and other.shape: + assert not isinstance(other, float) + + if len(other.shape) != self.ndim: + msg = f"Number of dimensions {len(other.shape)} must match histogram {self.ndim}" + raise ValueError(msg) + + if all(a in {b, 1} for a, b in zip(other.shape, self.shape)): + view = self.view(flow=False) + getattr(view, name)(other) + elif all(a in {b, 1} for a, b in zip(other.shape, self.axes.extent)): + view = self.view(flow=True) + getattr(view, name)(other) + else: + msg = f"Wrong shape {other.shape}, expected {self.shape} or {self.axes.extent}" + raise ValueError(msg) + else: + view = self.view(flow=True) + getattr(view, name)(other) + + self._variance_known = False + return self + + # TODO: Marked as too complex by flake8. Should be factored out a bit. + def fill( + self: H, + *args: ArrayLike | str, + weight: ArrayLike | None = None, + sample: ArrayLike | None = None, + threads: int | None = None, + ) -> H: + """ + Insert data into the histogram. + + Parameters + ---------- + *args : Union[Array[float], Array[int], Array[str], float, int, str] + Provide one value or array per dimension. + weight : List[Union[Array[float], Array[int], float, int, str]]] + Provide weights (only if the histogram storage supports it) + sample : List[Union[Array[float], Array[int], Array[str], float, int, str]]] + Provide samples (only if the histogram storage supports it) + threads : Optional[int] + Fill with threads. Defaults to None, which does not activate + threaded filling. Using 0 will automatically pick the number of + available threads (usually two per core). + """ + + if self._hist._storage_type is _core.storage.mean: + mean_storage_sample_check(sample) + + if ( + self._hist._storage_type + not in { + _core.storage.weight, + _core.storage.mean, + _core.storage.weighted_mean, + } + and weight is not None + ): + self._variance_known = False + + # Convert to NumPy arrays + args_ars = _fill_cast(args) + weight_ars = _fill_cast(weight) + sample_ars = _fill_cast(sample) + + if threads == 0: + threads = cpu_count() + + if threads is None or threads == 1: + self._hist.fill(*args_ars, weight=weight_ars, sample=sample_ars) + return self + + if self._hist._storage_type in { + _core.storage.mean, + _core.storage.weighted_mean, + }: + raise RuntimeError("Mean histograms do not support threaded filling") + + data: list[list[np.typing.NDArray[Any]] | list[str]] = [ + np.array_split(a, threads) if not isinstance(a, str) else [a] * threads + for a in args_ars + ] + + weights: list[Any] + if weight is None or np.isscalar(weight): + assert threads is not None + weights = [weight_ars] * threads + else: + weights = np.array_split(weight_ars, threads) + + samples: list[Any] + if sample_ars is None or np.isscalar(sample_ars): + assert threads is not None + samples = [sample_ars] * threads + else: + samples = np.array_split(sample_ars, threads) + + if self._hist._storage_type is _core.storage.atomic_int64: + + def fun( + weight: ArrayLike | None, + sample: ArrayLike | None, + *args: np.typing.NDArray[Any], + ) -> None: + self._hist.fill(*args, weight=weight, sample=sample) + + else: + sum_lock = threading.Lock() + + def fun( + weight: ArrayLike | None, + sample: ArrayLike | None, + *args: np.typing.NDArray[Any], + ) -> None: + local_hist = copy.copy(self._hist) + local_hist.reset() + local_hist.fill(*args, weight=weight, sample=sample) + with sum_lock: + self._hist += local_hist + + thread_list = [ + threading.Thread(target=fun, args=arrays) + for arrays in zip(weights, samples, *data) + ] + + for thread in thread_list: + thread.start() + + for thread in thread_list: + thread.join() + + return self + + def __str__(self) -> str: + """ + A rendering of the histogram is made using ASCII or unicode characters + (whatever is supported by the terminal). What exactly is displayed is + still experimental. Do not rely on any particular rendering. + """ + # TODO check the terminal width and adjust the presentation + # only use for 1D, fall back to repr for ND + if self._hist.rank() != 1: + return repr(self) + s = str(self._hist) + # get rid of first line and last character + return s[s.index("\n") + 1 : -1] + + def _axis(self, i: int = 0) -> Axis: + """ + Get N-th axis. + """ + return cast(self, self._hist.axis(i), Axis) + + @property + def storage_type(self) -> type[Storage]: + return cast(self, self._hist._storage_type, Storage) # type: ignore[return-value] + + @property + def _storage_type(self) -> type[Storage]: + warnings.warn( + "Accessing storage type has changed from _storage_type to storage_type, and will be removed in future.", + DeprecationWarning, + stacklevel=2, + ) + return cast(self, self._hist._storage_type, Storage) # type: ignore[return-value] + + def _reduce(self: H, *args: Any) -> H: + return self._new_hist(self._hist.reduce(*args)) + + def __copy__(self: H) -> H: + return self._new_hist(copy.copy(self._hist)) + + def __deepcopy__(self: H, memo: Any) -> H: + return self._new_hist(copy.deepcopy(self._hist), memo=memo) + + def __getstate__(self) -> tuple[int, dict[str, Any]]: + """ + Version 0.8: metadata added + Version 0.11: version added and set to 0. metadata/_hist replaced with dict. + Version 0.12: _variance_known is now in the dict (no format change) + + ``dict`` contains __dict__ with added "_hist" + """ + local_dict = copy.copy(self.__dict__) + local_dict["_hist"] = self._hist + # Version 0 of boost-histogram pickle state + return (0, local_dict) + + def __setstate__(self, state: Any) -> None: + if isinstance(state, tuple): + if state[0] == 0: + for key, value in state[1].items(): + setattr(self, key, value) + + # Added in 0.12 + if "_variance_known" not in state[1]: + self._variance_known = True + else: + msg = f"Cannot open boost-histogram pickle v{state[0]}" + raise RuntimeError(msg) + + else: # Classic (0.10 and before) state + self._hist = state["_hist"] + self._variance_known = True + self.metadata = state.get("metadata", None) + for i in range(self._hist.rank()): + self._hist.axis(i).metadata = {"metadata": self._hist.axis(i).metadata} + + self.axes = self._generate_axes_() + + def __repr__(self) -> str: + newline = "\n " + first_newline = newline if len(self.axes) > 1 else "" + storage_newline = ( + newline if len(self.axes) > 1 else " " if len(self.axes) > 0 else "" + ) + sep = "," if len(self.axes) > 0 else "" + ret = f"{self.__class__.__name__}({first_newline}" + ret += f",{newline}".join(repr(ax) for ax in self.axes) + ret += f"{sep}{storage_newline}storage={self.storage_type()}" # pylint: disable=not-callable + ret += ")" + outer = self.sum(flow=True) + if outer: + inner = self.sum(flow=False) + ret += f" # Sum: {inner}" + if inner != outer: + ret += f" ({outer} with flow)" + return ret + + def _compute_uhi_index(self, index: InnerIndexing, axis: int) -> SimpleIndexing: + """ + Converts an expression that contains UHI locators to one that does not. + """ + # Support sum and rebin directly + if index is sum or hasattr(index, "factor"): # type: ignore[comparison-overlap] + return slice(None, None, index) + + # General locators + # Note that MyPy doesn't like these very much - the fix + # will be to properly set input types + if callable(index): + return index(self.axes[axis]) + + if isinstance(index, float): + raise TypeError(f"Index {index} must be an integer, not float") + + if isinstance(index, SupportsIndex): + if abs(int(index)) >= self._hist.axis(axis).size: + raise IndexError("histogram index is out of range") + return int(index) % self._hist.axis(axis).size + + return index + + def _compute_commonindex( + self, index: IndexingExpr + ) -> list[SupportsIndex | slice | Mapping[int, SupportsIndex | slice]]: + """ + Takes indices and returns two iterables; one is a tuple or dict of the + original, Ellipsis expanded index, and the other returns index, + operation value pairs. + """ + indexes: list[Any] + + # Shorten the computations with direct access to raw object + hist = self._hist + + # Support dict access + if hasattr(index, "items"): + indexes = [slice(None)] * hist.rank() + for k, v in index.items(): + indexes[k] = v + + # Normalize -> h[i] == h[i,] + else: + tuple_index = (index,) if not isinstance(index, tuple) else index + + # Now a list + indexes = _expand_ellipsis(tuple_index, hist.rank()) + + if len(indexes) != hist.rank(): + raise IndexError("Wrong number of indices for histogram") + + # Allow [bh.loc(...)] to work + # TODO: could be nicer making a new list via a comprehension + for i in range(len(indexes)): # pylint: disable=consider-using-enumerate + # Support list of UHI indexers + if isinstance(indexes[i], list): + indexes[i] = [self._compute_uhi_index(ind, i) for ind in indexes[i]] + else: + indexes[i] = self._compute_uhi_index(indexes[i], i) + + return indexes + + def to_numpy( + self, flow: bool = False, *, dd: bool = False, view: bool = False + ) -> ( + tuple[np.typing.NDArray[Any], ...] + | tuple[np.typing.NDArray[Any], tuple[np.typing.NDArray[Any], ...]] + ): + """ + Convert to a NumPy style tuple of return arrays. Edges are converted to + match NumPy standards, with upper edge inclusive, unlike + boost-histogram, where upper edge is exclusive. + + Parameters + ---------- + flow : bool = False + Include the flow bins. + dd : bool = False + Use the histogramdd return syntax, where the edges are in a tuple. + Otherwise, this is the histogram/histogram2d return style. + view : bool = False + The behavior for the return value. By default, this will return + array of the values only regardless of the storage (which is all + NumPy's histogram function can do). view=True will return the + boost-histogram view of the storage. + + Return + ------ + contents : Array[Any] + The bin contents + *edges : Array[float] + The edges for each dimension + """ + + hist, *edges = self._hist.to_numpy(flow) + hist = self.view(flow=flow) if view else self.values(flow=flow) + + return (hist, edges) if dd else (hist, *edges) + + def copy(self: H, *, deep: bool = True) -> H: + """ + Make a copy of the histogram. Defaults to making a + deep copy (axis metadata copied); use deep=False + to avoid making a copy of axis metadata. + """ + + return copy.deepcopy(self) if deep else copy.copy(self) + + def reset(self: H) -> H: + """ + Clear the bin counters. + """ + self._hist.reset() + return self + + def empty(self, flow: bool = False) -> bool: + """ + Check to see if the histogram has any non-default values. + You can use flow=True to check flow bins too. + """ + return self._hist.empty(flow) + + def sum(self, flow: bool = False) -> float | Accumulator: + """ + Compute the sum over the histogram bins (optionally including the flow bins). + """ + return self._hist.sum(flow) # type: ignore[no-any-return] + + @property + def size(self) -> int: + """ + Total number of bins in the histogram (including underflow/overflow). + """ + return self._hist.size() + + @property + def shape(self) -> tuple[int, ...]: + """ + Tuple of axis sizes (not including underflow/overflow). + """ + return self.axes.size + + # TODO: Marked as too complex by flake8. Should be factored out a bit. + def __getitem__(self: H, index: IndexingExpr) -> H | float | Accumulator: + indexes = self._compute_commonindex(index) + + # If this is (now) all integers, return the bin contents + # But don't try *dict! + if not hasattr(indexes, "items") and all( + isinstance(a, SupportsIndex) for a in indexes + ): + return self._hist.at(*indexes) # type: ignore[no-any-return, arg-type] + + integrations: set[int] = set() + slices: list[_core.algorithm.reduce_command] = [] + pick_each: dict[int, int] = {} + pick_set: dict[int, list[int]] = {} + reduced: CppHistogram | None = None + + # Compute needed slices and projections + for i, ind in enumerate(indexes): + if isinstance(ind, SupportsIndex): + pick_each[i] = ind.__index__() + ( + 1 if self.axes[i].traits.underflow else 0 + ) + continue + + if isinstance(ind, collections.abc.Sequence): + pick_set[i] = list(ind) + continue + + if not isinstance(ind, slice): + raise IndexError( + "Must be a slice, an integer, or follow the locator protocol." + ) + + # If the dictionary brackets are forgotten, it's easy to put a slice + # into a slice - adding a nicer error message in that case + if any(isinstance(v, slice) for v in (ind.start, ind.stop, ind.step)): + raise TypeError( + "You have put a slice in a slice. Did you forget curly braces [{...}]?" + ) + + # This ensures that callable start/stop are handled + start, stop = self.axes[i]._process_loc(ind.start, ind.stop) + + groups = [] + if ind != slice(None): + merge = 1 + if ind.step is not None: + if getattr(ind.step, "factor", None) is not None: + merge = ind.step.factor + elif ( + hasattr(ind.step, "group_mapping") + and (tmp_groups := ind.step.group_mapping(self.axes[i])) + is not None + ): + groups = tmp_groups + elif callable(ind.step): + if ind.step is sum: + integrations.add(i) + else: + raise NotImplementedError + + if ind.start is not None or ind.stop is not None: + slices.append( + _core.algorithm.slice( + i, start, stop, _core.algorithm.slice_mode.crop + ) + ) + if len(groups) == 0: + continue + else: + raise IndexError( + "The third argument to a slice must be rebin or projection" + ) + + assert isinstance(start, int) + assert isinstance(stop, int) + # rebinning with factor + if len(groups) == 0: + slices.append( + _core.algorithm.slice_and_rebin(i, start, stop, merge) + ) + # rebinning with groups + elif len(groups) != 0: + if not reduced: + reduced = self._hist + axes = [reduced.axis(x) for x in range(reduced.rank())] + reduced_view = reduced.view(flow=True) + new_axes_indices = [axes[i].edges[0]] + + j = 0 + for group in groups: + new_axes_indices += [axes[i].edges[j + group]] + j += group + + variable_axis = Variable( + new_axes_indices, metadata=axes[i].metadata + ) + axes[i] = variable_axis._ax + + logger.debug("Axes: %s", axes) + + new_reduced = reduced.__class__(axes) + new_view = new_reduced.view(flow=True) + + j = 1 + for new_j, group in enumerate(groups): + for _ in range(group): + pos = [slice(None)] * (i) + new_view[(*pos, new_j + 1, ...)] += _to_view( + reduced_view[(*pos, j, ...)] + ) + j += 1 + + reduced = new_reduced + + # Will be updated below + if (slices or pick_set or pick_each or integrations) and not reduced: + reduced = self._hist + elif not reduced: + reduced = copy.copy(self._hist) + + if pick_each: + tuple_slice = tuple( + pick_each.get(i, slice(None)) for i in range(reduced.rank()) + ) + logger.debug("Slices for pick each: %s", tuple_slice) + axes = [ + reduced.axis(i) for i in range(reduced.rank()) if i not in pick_each + ] + logger.debug("Axes: %s", axes) + new_reduced = reduced.__class__(axes) + new_reduced.view(flow=True)[...] = reduced.view(flow=True)[tuple_slice] + reduced = new_reduced + integrations = {i - sum(j <= i for j in pick_each) for i in integrations} + pick_set = { + i - sum(j <= i for j in pick_each): v for i, v in pick_set.items() + } + for slice_ in slices: + slice_.iaxis -= sum(j <= slice_.iaxis for j in pick_each) + + if slices: + logger.debug("Reduce with %s", slices) + reduced = reduced.reduce(*slices) + + if pick_set: + warnings.warn( + "List indexing selection is experimental. Removed bins are not placed in overflow.", + stacklevel=2, + ) + logger.debug("Slices for picking sets: %s", pick_set) + axes = [reduced.axis(i) for i in range(reduced.rank())] + reduced_view = reduced.view(flow=True) + for i in pick_set: # pylint: disable=consider-using-dict-items + selection = copy.copy(pick_set[i]) + ax = reduced.axis(i) + if ax.traits_ordered: + msg = f"Axis {i} is not a categorical axis, cannot pick with list: {ax}" + raise RuntimeError(msg) + + if ax.traits_overflow and ax.size not in pick_set[i]: + selection.append(ax.size) + + new_axis = axes[i].__class__([axes[i].value(j) for j in pick_set[i]]) # type: ignore[call-arg] + new_axis.metadata = axes[i].metadata + axes[i] = new_axis + reduced_view = np.take(reduced_view, selection, axis=i) + + logger.debug("Axes: %s", axes) + new_reduced = reduced.__class__(axes) + new_reduced.view(flow=True)[...] = reduced_view + reduced = new_reduced + + if integrations: + projections = [i for i in range(reduced.rank()) if i not in integrations] + reduced = reduced.project(*projections) + + return self._new_hist(reduced) if reduced.rank() > 0 else reduced.sum(flow=True) + + def __setitem__(self, index: IndexingExpr, value: ArrayLike | Accumulator) -> None: + """ + There are several supported possibilities: + + h[slice] = array # same size + + If an array is given to a compatible slice, it is set. + + h[a:] = array # One larger + + If an array is given that does not match, if it does match the + with-overflow size, it fills that. + + PLANNED (not yet supported): + + h[a:] = h2 + + If another histogram is given, that must either match with or without + overflow, where the overflow bins must be overflow bins (that is, + you cannot set a histogram's flow bins from another histogram that + is 2 larger). Bin edges must be a close match, as well. If you don't + want this level of type safety, just use ``h[...] = h2.view()``. + """ + indexes = self._compute_commonindex(index) + + if isinstance(value, Histogram): + raise TypeError("Not supported yet") + + value = np.asarray(value) + view = self.view(flow=True) + + value_shape: tuple[int, ...] + # Support raw arrays for accumulators, the final dimension is the constructor values + if ( + value.ndim > 0 + and len(view.dtype) > 0 + and len(value.dtype) == 0 + and len(view.dtype) == value.shape[-1] + ): + value_shape = value.shape[:-1] + value_ndim = value.ndim - 1 + else: + value_shape = value.shape + value_ndim = value.ndim + + # NumPy does not broadcast partial slices, but we would need + # to allow it (because we do allow broadcasting up dimensions) + # Instead, we simply require matching dimensions. + if value_ndim > 0 and value_ndim != sum(isinstance(i, slice) for i in indexes): + msg = f"Setting a {len(indexes)}D histogram with a {value_ndim}D array must have a matching number of dimensions" + raise ValueError(msg) + + # Here, value_n does not increment with n if this is not a slice + value_n = 0 + for n, request in enumerate(indexes): + has_underflow = self.axes[n].traits.underflow + has_overflow = self.axes[n].traits.overflow + + if isinstance(request, slice): + # Only consider underflow/overflow if the endpoints are not given + use_underflow = has_underflow and request.start is None + use_overflow = has_overflow and request.stop is None + + # Make the limits explicit since we may need to shift them + start = 0 if request.start is None else request.start + stop = len(self.axes[n]) if request.stop is None else request.stop + request_len = stop - start + + # If set to a scalar, then treat it like broadcasting without flow bins + if value_ndim == 0: + start = 0 + has_overflow + stop = len(self.axes[n]) + has_underflow + + # Normal setting + elif request_len == value_shape[value_n]: + start += has_underflow + stop += has_underflow + + # Expanded setting + elif request_len + use_underflow + use_overflow == value_shape[value_n]: + start += has_underflow and not use_underflow + stop += has_underflow + (has_overflow and use_overflow) + + # Single element broadcasting + elif value_shape[value_n] == 1: + start += has_underflow + stop += has_underflow + + else: + msg = f"Mismatched shapes in dimension {n}" + msg += f", {value_shape[n]} != {request_len}" + if use_underflow or use_overflow: + msg += f" or {request_len + use_underflow + use_overflow}" + raise ValueError(msg) + indexes[n] = slice(start, stop, request.step) + value_n += 1 + else: + indexes[n] = request + has_underflow + + view[tuple(indexes)] = value # type: ignore[arg-type] + + def project(self: H, *args: int) -> H | float | Accumulator: + """ + Project to a single axis or several axes on a multidimensional histogram. + Provided a list of axis numbers, this will produce the histogram over + those axes only. Flow bins are used if available. + """ + for arg in args: + if arg < 0 or arg >= self.ndim: + raise ValueError( + f"Projection axis must be a valid axis number 0 to {self.ndim - 1}, not {arg}" + ) + + return self._new_hist(self._hist.project(*args)) + + # Implementation of PlottableHistogram + + @property + def kind(self) -> Kind: + """ + Returns Kind.COUNT if this is a normal summing histogram, and Kind.MEAN if this is a + mean histogram. + + :return: Kind + """ + mean = self._hist._storage_type in { + _core.storage.mean, + _core.storage.weighted_mean, + } + + return Kind.MEAN if mean else Kind.COUNT + + def values(self, flow: bool = False) -> np.typing.NDArray[Any]: + """ + Returns the accumulated values. The counts for simple histograms, the + sum of weights for weighted histograms, the mean for profiles, etc. + + If counts is equal to 0, the value in that cell is undefined if + kind == "MEAN". + + :param flow: Enable flow bins. Not part of PlottableHistogram, but + included for consistency with other methods and flexibility. + + :return: "np.typing.NDArray[Any]"[np.float64] + """ + + view = self.view(flow) + # TODO: Might be a NumPy typing bug + if len(view.dtype) == 0: + return view + return view.value + + def variances(self, flow: bool = False) -> np.typing.NDArray[Any] | None: + """ + Returns the estimated variance of the accumulated values. The sum of squared + weights for weighted histograms, the variance of samples for profiles, etc. + For an unweighed histogram where kind == "COUNT", this should return the same + as values if the histogram was not filled with weights, and None otherwise. + If counts is equal to 1 or less, the variance in that cell is undefined if + kind == "MEAN". This must be written <= 1, and not < 2; when this + effective counts (weighed mean), then counts could be less than 2 but + more than 1. + + If kind == "MEAN", the counts can be used to compute the error on the mean + as sqrt(variances / counts), this works whether or not the entries are + weighted if the weight variance was tracked by the implementation. + + Currently, this always returns - but in the future, it will return None + if a weighted fill is made on a unweighed storage. + + :param flow: Enable flow bins. Not part of PlottableHistogram, but + included for consistency with other methods and flexibility. + + :return: "np.typing.NDArray[Any]"[np.float64] + """ + + view = self.view(flow) + if len(view.dtype) == 0: + return view if self._variance_known else None + + if hasattr(view, "sum_of_weights"): + valid = view.sum_of_weights**2 > view.sum_of_weights_squared # type: ignore[union-attr] + return np.divide( + view.variance, + view.sum_of_weights, + out=np.full(view.sum_of_weights.shape, np.nan), + where=valid, + ) + + if hasattr(view, "count"): + return np.divide( + view.variance, + view.count, + out=np.full(view.count.shape, np.nan), + where=view.count > 1, + ) + + return view.variance + + def counts(self, flow: bool = False) -> np.typing.NDArray[Any]: + """ + Returns the number of entries in each bin for an unweighted + histogram or profile and an effective number of entries (defined below) + for a weighted histogram or profile. An exotic generalized histogram could + have no sensible .counts, so this is Optional and should be checked by + Consumers. + + If kind == "MEAN", counts (effective or not) can and should be used to + determine whether the mean value and its variance should be displayed + (see documentation of values and variances, respectively). The counts + should also be used to compute the error on the mean (see documentation + of variances). + + For a weighted histogram, counts is defined as sum_of_weights ** 2 / + sum_of_weights_squared. It is equal or less than the number of times + the bin was filled, the equality holds when all filled weights are equal. + The larger the spread in weights, the smaller it is, but it is always 0 + if filled 0 times, and 1 if filled once, and more than 1 otherwise. + + :return: "np.typing.NDArray[Any]"[np.float64] + """ + + view = self.view(flow) + + if len(view.dtype) == 0: + return view + + if hasattr(view, "sum_of_weights"): + return np.divide( + view.sum_of_weights**2, + view.sum_of_weights_squared, # type: ignore[union-attr] + out=np.zeros_like(view.sum_of_weights, dtype=np.float64), + where=view.sum_of_weights_squared != 0, # type: ignore[union-attr] + ) + + if hasattr(view, "count"): + return view.count + + return view.value + + +if TYPE_CHECKING: + from uhi.typing.plottable import PlottableHistogram + + _: PlottableHistogram = typing.cast(Histogram, None) diff --git a/src/boost_histogram/_internal/__init__.py b/src/boost_histogram/_internal/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/boost_histogram/_internal/enum.py b/src/boost_histogram/_internal/enum.py deleted file mode 100644 index f2f8dbb6..00000000 --- a/src/boost_histogram/_internal/enum.py +++ /dev/null @@ -1,18 +0,0 @@ -from __future__ import annotations - -from enum import Enum -from typing import Callable, cast - - -# This is a StrEnum as defined in Python 3.10 -class Kind(str, Enum): - COUNT = "COUNT" - MEAN = "MEAN" - - # This cast + type ignore is really odd, so it deserves a quick - # explanation. If we just set this like StrEnum does, then mypy complains - # that the type is changing (str -> Kind). If we type: ignore, then - # MyPy claims that the type: ignore is not needed. If we cast, we get the - # same error as before. But if we cast and type: ignore, it now works. - # Will report to MyPy. Tested on 0.800. - __str__ = cast(Callable[["Kind"], str], str.__str__) # type: ignore[assignment] diff --git a/src/boost_histogram/_internal/hist.py b/src/boost_histogram/_internal/hist.py deleted file mode 100644 index 684d9339..00000000 --- a/src/boost_histogram/_internal/hist.py +++ /dev/null @@ -1,1237 +0,0 @@ -from __future__ import annotations - -import collections.abc -import copy -import logging -import threading -import typing -import warnings -from os import cpu_count -from typing import ( - TYPE_CHECKING, - Any, - Callable, - ClassVar, - Iterable, - List, - Mapping, - NewType, - SupportsIndex, - Tuple, - TypeVar, - Union, -) - -import numpy as np - -import boost_histogram -from boost_histogram import _core - -from .._utils import cast, register, set_module -from ..axis import AxesTuple, Axis, Variable -from ..storage import Double, Storage -from ..typing import Accumulator, ArrayLike, CppHistogram -from ..view import MeanView, WeightedMeanView, WeightedSumView, _to_view -from .enum import Kind - -if TYPE_CHECKING: - from builtins import ellipsis - -NOTHING = object() - - -_histograms: set[type[CppHistogram]] = { - _core.hist.any_double, - _core.hist.any_int64, - _core.hist.any_atomic_int64, - _core.hist.any_unlimited, - _core.hist.any_weight, - _core.hist.any_mean, - _core.hist.any_weighted_mean, -} - -logger = logging.getLogger(__name__) - - -CppAxis = NewType("CppAxis", object) - -SimpleIndexing = Union[SupportsIndex, slice] -InnerIndexing = Union[SimpleIndexing, Callable[[Axis], int]] -FullInnerIndexing = Union[InnerIndexing, List[InnerIndexing]] -IndexingWithMapping = Union[FullInnerIndexing, Mapping[int, FullInnerIndexing]] -IndexingExpr = Union[IndexingWithMapping, Tuple[IndexingWithMapping, ...], "ellipsis"] - -T = TypeVar("T") - - -def _fill_cast( - value: T, *, inner: bool = False -) -> T | np.typing.NDArray[Any] | tuple[T, ...]: - """ - Convert to NumPy arrays. Some buffer objects do not get converted by forcecast. - If not called by itself (inner=False), then will work through one level of tuple/list. - """ - if value is None or isinstance(value, (str, bytes)): - return value - - if not inner and isinstance(value, (tuple, list)): - return tuple(_fill_cast(a, inner=True) for a in value) - - if hasattr(value, "__iter__") or hasattr(value, "__array__"): - return np.asarray(value) - - return value - - -def mean_storage_sample_check(sample: ArrayLike | None) -> None: - if sample is None: - raise TypeError("Sample key-argument (sample=) needs to be provided.") - seqs = (collections.abc.Sequence, np.ndarray) - msg1 = f"Sample key-argument needs to be a sequence, {sample.__class__.__name__} given." - if isinstance(sample, str) and not isinstance(sample, seqs): - raise ValueError(msg1) - sample_dim = np.array(sample).ndim - msg2 = f"Sample key-argument needs to be 1 dimensional, {sample_dim} given." - if sample_dim != 1: - raise ValueError(msg2) - - -def _arg_shortcut(item: tuple[int, float, float] | Axis | CppAxis) -> CppAxis: - if isinstance(item, tuple) and len(item) == 3: - msg = "Using () directly in constructor is a developer shortcut and will be removed in a future version" - warnings.warn(msg, FutureWarning, stacklevel=4) - return _core.axis.regular_uoflow(item[0], item[1], item[2]) # type: ignore[return-value] - - if isinstance(item, Axis): - return item._ax # type: ignore[no-any-return] - - raise TypeError("Only axes supported in histogram constructor") - - -def _expand_ellipsis(indexes: Iterable[Any], rank: int) -> list[Any]: - indexes = list(indexes) - number_ellipses = indexes.count(Ellipsis) - if number_ellipses == 0: - return indexes - if number_ellipses == 1: - index = indexes.index(Ellipsis) - additional = rank + 1 - len(indexes) - if additional < 0: - raise IndexError("too many indices for histogram") - - # Fill out the ellipsis with empty slices - return indexes[:index] + [slice(None)] * additional + indexes[index + 1 :] - - raise IndexError("an index can only have a single ellipsis ('...')") - - -H = TypeVar("H", bound="Histogram") - - -# We currently do not cast *to* a histogram, but this is consistent -# and could be used later. -@register(_histograms) # type: ignore[arg-type] -@set_module("boost_histogram") -class Histogram: - # Note this is a __slots__ __dict__ class! - __slots__ = ( - "__dict__", - "_hist", - "axes", - ) - # .metadata and ._variance_known are part of the dict - - _family: ClassVar[object] = boost_histogram - - axes: AxesTuple - _hist: CppHistogram - _variance_known: bool - - def __init_subclass__(cls, *, family: object | None = None) -> None: - """ - Sets the family for the histogram. This should be a unique object (such - as the main module of your package) that is consistently set across all - subclasses. When converting back from C++, casting will try to always - pick the best matching family from the loaded subclasses for Axis and - such. - """ - super().__init_subclass__() - cls._family = family if family is not None else object() - - @typing.overload - def __init__(self, *args: Histogram) -> None: ... - - @typing.overload - def __init__(self, *args: CppHistogram, metadata: Any = ...) -> None: ... - - @typing.overload - def __init__( - self, - *axes: Axis | CppAxis, - storage: Storage = ..., - metadata: Any = ..., - ) -> None: ... - - def __init__( - self, - *axes: Axis | CppAxis | Histogram | CppHistogram, - storage: Storage = Double(), # noqa: B008 - metadata: Any = None, - ) -> None: - """ - Construct a new histogram. - - If you pass in a single argument, this will be treated as a - histogram and this will convert the histogram to this type of - histogram. - - Parameters - ---------- - *args : Axis - Provide 1 or more axis instances. - storage : Storage = bh.storage.Double() - Select a storage to use in the histogram - metadata : Any = None - Data that is passed along if a new histogram is created - """ - self._variance_known = True - - # Allow construction from a raw histogram object (internal) - if len(axes) == 1 and isinstance(axes[0], tuple(_histograms)): - cpp_hist: CppHistogram = axes[0] # type: ignore[assignment] - self._from_histogram_cpp(cpp_hist) - if metadata: - self.metadata = metadata - return - - # If we construct with another Histogram as the only positional argument, - # support that too - if len(axes) == 1 and isinstance(axes[0], Histogram): - normal_hist: Histogram = axes[0] - self._from_histogram_object(normal_hist) - if metadata: - self.metadata = metadata - return - - # Support objects that provide a to_boost method, like Uproot - if len(axes) == 1 and hasattr(axes[0], "_to_boost_histogram_"): - self._from_histogram_object(axes[0]._to_boost_histogram_()) - return - - if storage is None: - storage = Double() # type: ignore[unreachable] - - self.metadata = metadata - - # Check for missed parenthesis or incorrect types - if not isinstance(storage, Storage): - msg_storage = ( # type: ignore[unreachable] - "Passing in an initialized storage has been removed. Please add ()." - ) - msg_unknown = "Only storages allowed in storage argument" - raise KeyError(msg_storage if issubclass(storage, Storage) else msg_unknown) - - # Allow a tuple to represent a regular axis - axes = tuple(_arg_shortcut(arg) for arg in axes) # type: ignore[arg-type] - - if len(axes) > _core.hist._axes_limit: - msg = f"Too many axes, must be less than {_core.hist._axes_limit}" - raise IndexError(msg) - - # Check all available histograms, and if the storage matches, return that one - for h in _histograms: - if isinstance(storage, h._storage_type): - self._hist = h(axes, storage) # type: ignore[arg-type] - self.axes = self._generate_axes_() - return - - raise TypeError("Unsupported storage") - - @classmethod - def _clone( - cls: type[H], - _hist: Histogram | CppHistogram, - *, - other: Histogram | None = None, - memo: Any = NOTHING, - ) -> H: - """ - Clone a histogram (possibly of a different base). Does not trigger __init__. - This will copy data from `other=` if non-None, otherwise metadata gets copied from the input. - """ - - self = cls.__new__(cls) - if isinstance(_hist, tuple(_histograms)): - self._from_histogram_cpp(_hist) # type: ignore[arg-type] - if other is not None: - return cls._clone(self, other=other, memo=memo) - return self - - assert isinstance(_hist, Histogram) - - if other is None: - other = _hist - - self._from_histogram_object(_hist) - - if memo is NOTHING: - self.__dict__ = copy.copy(other.__dict__) - else: - self.__dict__ = copy.deepcopy(other.__dict__, memo) - - for ax in self.axes: - if memo is NOTHING: - ax.__dict__ = copy.copy(ax._ax.metadata) - else: - ax.__dict__ = copy.deepcopy(ax._ax.metadata, memo) - return self - - def _new_hist(self: H, _hist: CppHistogram, memo: Any = NOTHING) -> H: - """ - Return a new histogram given a new _hist, copying current metadata. - """ - return self.__class__._clone(_hist, other=self, memo=memo) - - def _from_histogram_cpp(self, other: CppHistogram) -> None: - """ - Import a Cpp histogram. - """ - self._variance_known = True - self._hist = other - self.metadata = None - self.axes = self._generate_axes_() - - def _from_histogram_object(self, other: Histogram) -> None: - """ - Convert self into a new histogram object based on another, possibly - converting from a different subclass. - """ - self._hist = other._hist - self.__dict__ = copy.copy(other.__dict__) - self.axes = self._generate_axes_() - for ax in self.axes: - ax.__dict__ = copy.copy(ax._ax.metadata) - - # Allow custom behavior on either "from" or "to" - other._export_bh_(self) - self._import_bh_() - - def _import_bh_(self) -> None: - """ - If any post-processing is needed to pass a histogram between libraries, a - subclass can implement it here. self is the new instance in the current - (converted-to) class. - """ - - @classmethod - def _export_bh_(cls, self: Histogram) -> None: - """ - If any preparation is needed to pass a histogram between libraries, a subclass can - implement it here. cls is the current class being converted from, and self is the - instance in the class being converted to. - """ - - def _generate_axes_(self) -> AxesTuple: - """ - This is called to fill in the axes. Subclasses can override it if they need - to change the axes tuple. - """ - - return AxesTuple(self._axis(i) for i in range(self.ndim)) - - @property - def ndim(self) -> int: - """ - Number of axes (dimensions) of the histogram. - """ - return self._hist.rank() - - def view( - self, flow: bool = False - ) -> np.typing.NDArray[Any] | WeightedSumView | WeightedMeanView | MeanView: - """ - Return a view into the data, optionally with overflow turned on. - """ - return _to_view(self._hist.view(flow)) - - def __array__( - self, - dtype: np.typing.DTypeLike | None = None, - *, - # pylint: disable-next=redefined-outer-name - copy: bool | None = None, - ) -> np.typing.NDArray[Any]: - # The copy kw is new in NumPy 2.0 - kwargs = {} - if copy is not None: - kwargs["copy"] = copy - return np.asarray(self.view(False), dtype=dtype, **kwargs) # type: ignore[call-overload] - - def __eq__(self, other: Any) -> bool: - return hasattr(other, "_hist") and self._hist == other._hist - - def __ne__(self, other: Any) -> bool: - return (not hasattr(other, "_hist")) or self._hist != other._hist - - def __add__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - result = self.copy(deep=False) - return result.__iadd__(other) - - def __iadd__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - if isinstance(other, (int, float)) and other == 0: - return self - self._compute_inplace_op("__iadd__", other) - - # Addition may change the axes if they can grow - self.axes = self._generate_axes_() - - return self - - def __radd__(self: H, other: np.typing.NDArray[Any] | float) -> H: - return self + other - - def __sub__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - result = self.copy(deep=False) - return result.__isub__(other) - - def __isub__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - if isinstance(other, (int, float)) and other == 0: - return self - self._compute_inplace_op("__isub__", other) - - self.axes = self._generate_axes_() - - return self - - # If these fail, the underlying object throws the correct error - def __mul__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - result = self.copy(deep=False) - return result._compute_inplace_op("__imul__", other) - - def __rmul__(self: H, other: np.typing.NDArray[Any] | float) -> H: - return self * other - - def __truediv__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - result = self.copy(deep=False) - return result._compute_inplace_op("__itruediv__", other) - - def __div__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - result = self.copy(deep=False) - return result._compute_inplace_op("__idiv__", other) - - def __idiv__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - return self._compute_inplace_op("__idiv__", other) - - def __itruediv__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - return self._compute_inplace_op("__itruediv__", other) - - def __imul__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - return self._compute_inplace_op("__imul__", other) - - def _compute_inplace_op( - self: H, name: str, other: Histogram | np.typing.NDArray[Any] | float - ) -> H: - # Also takes CppHistogram, but that confuses mypy because it's hard to pick out - if isinstance(other, Histogram): - getattr(self._hist, name)(other._hist) - elif isinstance(other, tuple(_histograms)): - getattr(self._hist, name)(other) - elif hasattr(other, "shape") and other.shape: - assert not isinstance(other, float) - - if len(other.shape) != self.ndim: - msg = f"Number of dimensions {len(other.shape)} must match histogram {self.ndim}" - raise ValueError(msg) - - if all(a in {b, 1} for a, b in zip(other.shape, self.shape)): - view = self.view(flow=False) - getattr(view, name)(other) - elif all(a in {b, 1} for a, b in zip(other.shape, self.axes.extent)): - view = self.view(flow=True) - getattr(view, name)(other) - else: - msg = f"Wrong shape {other.shape}, expected {self.shape} or {self.axes.extent}" - raise ValueError(msg) - else: - view = self.view(flow=True) - getattr(view, name)(other) - - self._variance_known = False - return self - - # TODO: Marked as too complex by flake8. Should be factored out a bit. - def fill( - self: H, - *args: ArrayLike | str, - weight: ArrayLike | None = None, - sample: ArrayLike | None = None, - threads: int | None = None, - ) -> H: - """ - Insert data into the histogram. - - Parameters - ---------- - *args : Union[Array[float], Array[int], Array[str], float, int, str] - Provide one value or array per dimension. - weight : List[Union[Array[float], Array[int], float, int, str]]] - Provide weights (only if the histogram storage supports it) - sample : List[Union[Array[float], Array[int], Array[str], float, int, str]]] - Provide samples (only if the histogram storage supports it) - threads : Optional[int] - Fill with threads. Defaults to None, which does not activate - threaded filling. Using 0 will automatically pick the number of - available threads (usually two per core). - """ - - if self._hist._storage_type is _core.storage.mean: - mean_storage_sample_check(sample) - - if ( - self._hist._storage_type - not in { - _core.storage.weight, - _core.storage.mean, - _core.storage.weighted_mean, - } - and weight is not None - ): - self._variance_known = False - - # Convert to NumPy arrays - args_ars = _fill_cast(args) - weight_ars = _fill_cast(weight) - sample_ars = _fill_cast(sample) - - if threads == 0: - threads = cpu_count() - - if threads is None or threads == 1: - self._hist.fill(*args_ars, weight=weight_ars, sample=sample_ars) - return self - - if self._hist._storage_type in { - _core.storage.mean, - _core.storage.weighted_mean, - }: - raise RuntimeError("Mean histograms do not support threaded filling") - - data: list[list[np.typing.NDArray[Any]] | list[str]] = [ - np.array_split(a, threads) if not isinstance(a, str) else [a] * threads - for a in args_ars - ] - - weights: list[Any] - if weight is None or np.isscalar(weight): - assert threads is not None - weights = [weight_ars] * threads - else: - weights = np.array_split(weight_ars, threads) - - samples: list[Any] - if sample_ars is None or np.isscalar(sample_ars): - assert threads is not None - samples = [sample_ars] * threads - else: - samples = np.array_split(sample_ars, threads) - - if self._hist._storage_type is _core.storage.atomic_int64: - - def fun( - weight: ArrayLike | None, - sample: ArrayLike | None, - *args: np.typing.NDArray[Any], - ) -> None: - self._hist.fill(*args, weight=weight, sample=sample) - - else: - sum_lock = threading.Lock() - - def fun( - weight: ArrayLike | None, - sample: ArrayLike | None, - *args: np.typing.NDArray[Any], - ) -> None: - local_hist = copy.copy(self._hist) - local_hist.reset() - local_hist.fill(*args, weight=weight, sample=sample) - with sum_lock: - self._hist += local_hist - - thread_list = [ - threading.Thread(target=fun, args=arrays) - for arrays in zip(weights, samples, *data) - ] - - for thread in thread_list: - thread.start() - - for thread in thread_list: - thread.join() - - return self - - def __str__(self) -> str: - """ - A rendering of the histogram is made using ASCII or unicode characters - (whatever is supported by the terminal). What exactly is displayed is - still experimental. Do not rely on any particular rendering. - """ - # TODO check the terminal width and adjust the presentation - # only use for 1D, fall back to repr for ND - if self._hist.rank() != 1: - return repr(self) - s = str(self._hist) - # get rid of first line and last character - return s[s.index("\n") + 1 : -1] - - def _axis(self, i: int = 0) -> Axis: - """ - Get N-th axis. - """ - return cast(self, self._hist.axis(i), Axis) - - @property - def storage_type(self) -> type[Storage]: - return cast(self, self._hist._storage_type, Storage) # type: ignore[return-value] - - @property - def _storage_type(self) -> type[Storage]: - warnings.warn( - "Accessing storage type has changed from _storage_type to storage_type, and will be removed in future.", - DeprecationWarning, - stacklevel=2, - ) - return cast(self, self._hist._storage_type, Storage) # type: ignore[return-value] - - def _reduce(self: H, *args: Any) -> H: - return self._new_hist(self._hist.reduce(*args)) - - def __copy__(self: H) -> H: - return self._new_hist(copy.copy(self._hist)) - - def __deepcopy__(self: H, memo: Any) -> H: - return self._new_hist(copy.deepcopy(self._hist), memo=memo) - - def __getstate__(self) -> tuple[int, dict[str, Any]]: - """ - Version 0.8: metadata added - Version 0.11: version added and set to 0. metadata/_hist replaced with dict. - Version 0.12: _variance_known is now in the dict (no format change) - - ``dict`` contains __dict__ with added "_hist" - """ - local_dict = copy.copy(self.__dict__) - local_dict["_hist"] = self._hist - # Version 0 of boost-histogram pickle state - return (0, local_dict) - - def __setstate__(self, state: Any) -> None: - if isinstance(state, tuple): - if state[0] == 0: - for key, value in state[1].items(): - setattr(self, key, value) - - # Added in 0.12 - if "_variance_known" not in state[1]: - self._variance_known = True - else: - msg = f"Cannot open boost-histogram pickle v{state[0]}" - raise RuntimeError(msg) - - else: # Classic (0.10 and before) state - self._hist = state["_hist"] - self._variance_known = True - self.metadata = state.get("metadata", None) - for i in range(self._hist.rank()): - self._hist.axis(i).metadata = {"metadata": self._hist.axis(i).metadata} - - self.axes = self._generate_axes_() - - def __repr__(self) -> str: - newline = "\n " - first_newline = newline if len(self.axes) > 1 else "" - storage_newline = ( - newline if len(self.axes) > 1 else " " if len(self.axes) > 0 else "" - ) - sep = "," if len(self.axes) > 0 else "" - ret = f"{self.__class__.__name__}({first_newline}" - ret += f",{newline}".join(repr(ax) for ax in self.axes) - ret += f"{sep}{storage_newline}storage={self.storage_type()}" # pylint: disable=not-callable - ret += ")" - outer = self.sum(flow=True) - if outer: - inner = self.sum(flow=False) - ret += f" # Sum: {inner}" - if inner != outer: - ret += f" ({outer} with flow)" - return ret - - def _compute_uhi_index(self, index: InnerIndexing, axis: int) -> SimpleIndexing: - """ - Converts an expression that contains UHI locators to one that does not. - """ - # Support sum and rebin directly - if index is sum or hasattr(index, "factor"): # type: ignore[comparison-overlap] - return slice(None, None, index) - - # General locators - # Note that MyPy doesn't like these very much - the fix - # will be to properly set input types - if callable(index): - return index(self.axes[axis]) - - if isinstance(index, float): - raise TypeError(f"Index {index} must be an integer, not float") - - if isinstance(index, SupportsIndex): - if abs(int(index)) >= self._hist.axis(axis).size: - raise IndexError("histogram index is out of range") - return int(index) % self._hist.axis(axis).size - - return index - - def _compute_commonindex( - self, index: IndexingExpr - ) -> list[SupportsIndex | slice | Mapping[int, SupportsIndex | slice]]: - """ - Takes indices and returns two iterables; one is a tuple or dict of the - original, Ellipsis expanded index, and the other returns index, - operation value pairs. - """ - indexes: list[Any] - - # Shorten the computations with direct access to raw object - hist = self._hist - - # Support dict access - if hasattr(index, "items"): - indexes = [slice(None)] * hist.rank() - for k, v in index.items(): - indexes[k] = v - - # Normalize -> h[i] == h[i,] - else: - tuple_index = (index,) if not isinstance(index, tuple) else index - - # Now a list - indexes = _expand_ellipsis(tuple_index, hist.rank()) - - if len(indexes) != hist.rank(): - raise IndexError("Wrong number of indices for histogram") - - # Allow [bh.loc(...)] to work - # TODO: could be nicer making a new list via a comprehension - for i in range(len(indexes)): # pylint: disable=consider-using-enumerate - # Support list of UHI indexers - if isinstance(indexes[i], list): - indexes[i] = [self._compute_uhi_index(ind, i) for ind in indexes[i]] - else: - indexes[i] = self._compute_uhi_index(indexes[i], i) - - return indexes - - def to_numpy( - self, flow: bool = False, *, dd: bool = False, view: bool = False - ) -> ( - tuple[np.typing.NDArray[Any], ...] - | tuple[np.typing.NDArray[Any], tuple[np.typing.NDArray[Any], ...]] - ): - """ - Convert to a NumPy style tuple of return arrays. Edges are converted to - match NumPy standards, with upper edge inclusive, unlike - boost-histogram, where upper edge is exclusive. - - Parameters - ---------- - flow : bool = False - Include the flow bins. - dd : bool = False - Use the histogramdd return syntax, where the edges are in a tuple. - Otherwise, this is the histogram/histogram2d return style. - view : bool = False - The behavior for the return value. By default, this will return - array of the values only regardless of the storage (which is all - NumPy's histogram function can do). view=True will return the - boost-histogram view of the storage. - - Return - ------ - contents : Array[Any] - The bin contents - *edges : Array[float] - The edges for each dimension - """ - - hist, *edges = self._hist.to_numpy(flow) - hist = self.view(flow=flow) if view else self.values(flow=flow) - - return (hist, edges) if dd else (hist, *edges) - - def copy(self: H, *, deep: bool = True) -> H: - """ - Make a copy of the histogram. Defaults to making a - deep copy (axis metadata copied); use deep=False - to avoid making a copy of axis metadata. - """ - - return copy.deepcopy(self) if deep else copy.copy(self) - - def reset(self: H) -> H: - """ - Clear the bin counters. - """ - self._hist.reset() - return self - - def empty(self, flow: bool = False) -> bool: - """ - Check to see if the histogram has any non-default values. - You can use flow=True to check flow bins too. - """ - return self._hist.empty(flow) - - def sum(self, flow: bool = False) -> float | Accumulator: - """ - Compute the sum over the histogram bins (optionally including the flow bins). - """ - return self._hist.sum(flow) # type: ignore[no-any-return] - - @property - def size(self) -> int: - """ - Total number of bins in the histogram (including underflow/overflow). - """ - return self._hist.size() - - @property - def shape(self) -> tuple[int, ...]: - """ - Tuple of axis sizes (not including underflow/overflow). - """ - return self.axes.size - - # TODO: Marked as too complex by flake8. Should be factored out a bit. - def __getitem__(self: H, index: IndexingExpr) -> H | float | Accumulator: - indexes = self._compute_commonindex(index) - - # If this is (now) all integers, return the bin contents - # But don't try *dict! - if not hasattr(indexes, "items") and all( - isinstance(a, SupportsIndex) for a in indexes - ): - return self._hist.at(*indexes) # type: ignore[no-any-return, arg-type] - - integrations: set[int] = set() - slices: list[_core.algorithm.reduce_command] = [] - pick_each: dict[int, int] = {} - pick_set: dict[int, list[int]] = {} - reduced: CppHistogram | None = None - - # Compute needed slices and projections - for i, ind in enumerate(indexes): - if isinstance(ind, SupportsIndex): - pick_each[i] = ind.__index__() + ( - 1 if self.axes[i].traits.underflow else 0 - ) - continue - - if isinstance(ind, collections.abc.Sequence): - pick_set[i] = list(ind) - continue - - if not isinstance(ind, slice): - raise IndexError( - "Must be a slice, an integer, or follow the locator protocol." - ) - - # If the dictionary brackets are forgotten, it's easy to put a slice - # into a slice - adding a nicer error message in that case - if any(isinstance(v, slice) for v in (ind.start, ind.stop, ind.step)): - raise TypeError( - "You have put a slice in a slice. Did you forget curly braces [{...}]?" - ) - - # This ensures that callable start/stop are handled - start, stop = self.axes[i]._process_loc(ind.start, ind.stop) - - groups = [] - if ind != slice(None): - merge = 1 - if ind.step is not None: - if getattr(ind.step, "factor", None) is not None: - merge = ind.step.factor - elif ( - hasattr(ind.step, "group_mapping") - and (tmp_groups := ind.step.group_mapping(self.axes[i])) - is not None - ): - groups = tmp_groups - elif callable(ind.step): - if ind.step is sum: - integrations.add(i) - else: - raise NotImplementedError - - if ind.start is not None or ind.stop is not None: - slices.append( - _core.algorithm.slice( - i, start, stop, _core.algorithm.slice_mode.crop - ) - ) - if len(groups) == 0: - continue - else: - raise IndexError( - "The third argument to a slice must be rebin or projection" - ) - - assert isinstance(start, int) - assert isinstance(stop, int) - # rebinning with factor - if len(groups) == 0: - slices.append( - _core.algorithm.slice_and_rebin(i, start, stop, merge) - ) - # rebinning with groups - elif len(groups) != 0: - if not reduced: - reduced = self._hist - axes = [reduced.axis(x) for x in range(reduced.rank())] - reduced_view = reduced.view(flow=True) - new_axes_indices = [axes[i].edges[0]] - - j = 0 - for group in groups: - new_axes_indices += [axes[i].edges[j + group]] - j += group - - variable_axis = Variable( - new_axes_indices, metadata=axes[i].metadata - ) - axes[i] = variable_axis._ax - - logger.debug("Axes: %s", axes) - - new_reduced = reduced.__class__(axes) - new_view = new_reduced.view(flow=True) - - j = 1 - for new_j, group in enumerate(groups): - for _ in range(group): - pos = [slice(None)] * (i) - new_view[(*pos, new_j + 1, ...)] += _to_view( - reduced_view[(*pos, j, ...)] - ) - j += 1 - - reduced = new_reduced - - # Will be updated below - if (slices or pick_set or pick_each or integrations) and not reduced: - reduced = self._hist - elif not reduced: - reduced = copy.copy(self._hist) - - if pick_each: - tuple_slice = tuple( - pick_each.get(i, slice(None)) for i in range(reduced.rank()) - ) - logger.debug("Slices for pick each: %s", tuple_slice) - axes = [ - reduced.axis(i) for i in range(reduced.rank()) if i not in pick_each - ] - logger.debug("Axes: %s", axes) - new_reduced = reduced.__class__(axes) - new_reduced.view(flow=True)[...] = reduced.view(flow=True)[tuple_slice] - reduced = new_reduced - integrations = {i - sum(j <= i for j in pick_each) for i in integrations} - pick_set = { - i - sum(j <= i for j in pick_each): v for i, v in pick_set.items() - } - for slice_ in slices: - slice_.iaxis -= sum(j <= slice_.iaxis for j in pick_each) - - if slices: - logger.debug("Reduce with %s", slices) - reduced = reduced.reduce(*slices) - - if pick_set: - warnings.warn( - "List indexing selection is experimental. Removed bins are not placed in overflow.", - stacklevel=2, - ) - logger.debug("Slices for picking sets: %s", pick_set) - axes = [reduced.axis(i) for i in range(reduced.rank())] - reduced_view = reduced.view(flow=True) - for i in pick_set: # pylint: disable=consider-using-dict-items - selection = copy.copy(pick_set[i]) - ax = reduced.axis(i) - if ax.traits_ordered: - msg = f"Axis {i} is not a categorical axis, cannot pick with list: {ax}" - raise RuntimeError(msg) - - if ax.traits_overflow and ax.size not in pick_set[i]: - selection.append(ax.size) - - new_axis = axes[i].__class__([axes[i].value(j) for j in pick_set[i]]) # type: ignore[call-arg] - new_axis.metadata = axes[i].metadata - axes[i] = new_axis - reduced_view = np.take(reduced_view, selection, axis=i) - - logger.debug("Axes: %s", axes) - new_reduced = reduced.__class__(axes) - new_reduced.view(flow=True)[...] = reduced_view - reduced = new_reduced - - if integrations: - projections = [i for i in range(reduced.rank()) if i not in integrations] - reduced = reduced.project(*projections) - - return self._new_hist(reduced) if reduced.rank() > 0 else reduced.sum(flow=True) - - def __setitem__(self, index: IndexingExpr, value: ArrayLike | Accumulator) -> None: - """ - There are several supported possibilities: - - h[slice] = array # same size - - If an array is given to a compatible slice, it is set. - - h[a:] = array # One larger - - If an array is given that does not match, if it does match the - with-overflow size, it fills that. - - PLANNED (not yet supported): - - h[a:] = h2 - - If another histogram is given, that must either match with or without - overflow, where the overflow bins must be overflow bins (that is, - you cannot set a histogram's flow bins from another histogram that - is 2 larger). Bin edges must be a close match, as well. If you don't - want this level of type safety, just use ``h[...] = h2.view()``. - """ - indexes = self._compute_commonindex(index) - - if isinstance(value, Histogram): - raise TypeError("Not supported yet") - - value = np.asarray(value) - view = self.view(flow=True) - - value_shape: tuple[int, ...] - # Support raw arrays for accumulators, the final dimension is the constructor values - if ( - value.ndim > 0 - and len(view.dtype) > 0 - and len(value.dtype) == 0 - and len(view.dtype) == value.shape[-1] - ): - value_shape = value.shape[:-1] - value_ndim = value.ndim - 1 - else: - value_shape = value.shape - value_ndim = value.ndim - - # NumPy does not broadcast partial slices, but we would need - # to allow it (because we do allow broadcasting up dimensions) - # Instead, we simply require matching dimensions. - if value_ndim > 0 and value_ndim != sum(isinstance(i, slice) for i in indexes): - msg = f"Setting a {len(indexes)}D histogram with a {value_ndim}D array must have a matching number of dimensions" - raise ValueError(msg) - - # Here, value_n does not increment with n if this is not a slice - value_n = 0 - for n, request in enumerate(indexes): - has_underflow = self.axes[n].traits.underflow - has_overflow = self.axes[n].traits.overflow - - if isinstance(request, slice): - # Only consider underflow/overflow if the endpoints are not given - use_underflow = has_underflow and request.start is None - use_overflow = has_overflow and request.stop is None - - # Make the limits explicit since we may need to shift them - start = 0 if request.start is None else request.start - stop = len(self.axes[n]) if request.stop is None else request.stop - request_len = stop - start - - # If set to a scalar, then treat it like broadcasting without flow bins - if value_ndim == 0: - start = 0 + has_overflow - stop = len(self.axes[n]) + has_underflow - - # Normal setting - elif request_len == value_shape[value_n]: - start += has_underflow - stop += has_underflow - - # Expanded setting - elif request_len + use_underflow + use_overflow == value_shape[value_n]: - start += has_underflow and not use_underflow - stop += has_underflow + (has_overflow and use_overflow) - - # Single element broadcasting - elif value_shape[value_n] == 1: - start += has_underflow - stop += has_underflow - - else: - msg = f"Mismatched shapes in dimension {n}" - msg += f", {value_shape[n]} != {request_len}" - if use_underflow or use_overflow: - msg += f" or {request_len + use_underflow + use_overflow}" - raise ValueError(msg) - indexes[n] = slice(start, stop, request.step) - value_n += 1 - else: - indexes[n] = request + has_underflow - - view[tuple(indexes)] = value # type: ignore[arg-type] - - def project(self: H, *args: int) -> H | float | Accumulator: - """ - Project to a single axis or several axes on a multidimensional histogram. - Provided a list of axis numbers, this will produce the histogram over - those axes only. Flow bins are used if available. - """ - for arg in args: - if arg < 0 or arg >= self.ndim: - raise ValueError( - f"Projection axis must be a valid axis number 0 to {self.ndim - 1}, not {arg}" - ) - - return self._new_hist(self._hist.project(*args)) - - # Implementation of PlottableHistogram - - @property - def kind(self) -> Kind: - """ - Returns Kind.COUNT if this is a normal summing histogram, and Kind.MEAN if this is a - mean histogram. - - :return: Kind - """ - mean = self._hist._storage_type in { - _core.storage.mean, - _core.storage.weighted_mean, - } - - return Kind.MEAN if mean else Kind.COUNT - - def values(self, flow: bool = False) -> np.typing.NDArray[Any]: - """ - Returns the accumulated values. The counts for simple histograms, the - sum of weights for weighted histograms, the mean for profiles, etc. - - If counts is equal to 0, the value in that cell is undefined if - kind == "MEAN". - - :param flow: Enable flow bins. Not part of PlottableHistogram, but - included for consistency with other methods and flexibility. - - :return: "np.typing.NDArray[Any]"[np.float64] - """ - - view = self.view(flow) - # TODO: Might be a NumPy typing bug - if len(view.dtype) == 0: - return view - return view.value - - def variances(self, flow: bool = False) -> np.typing.NDArray[Any] | None: - """ - Returns the estimated variance of the accumulated values. The sum of squared - weights for weighted histograms, the variance of samples for profiles, etc. - For an unweighed histogram where kind == "COUNT", this should return the same - as values if the histogram was not filled with weights, and None otherwise. - If counts is equal to 1 or less, the variance in that cell is undefined if - kind == "MEAN". This must be written <= 1, and not < 2; when this - effective counts (weighed mean), then counts could be less than 2 but - more than 1. - - If kind == "MEAN", the counts can be used to compute the error on the mean - as sqrt(variances / counts), this works whether or not the entries are - weighted if the weight variance was tracked by the implementation. - - Currently, this always returns - but in the future, it will return None - if a weighted fill is made on a unweighed storage. - - :param flow: Enable flow bins. Not part of PlottableHistogram, but - included for consistency with other methods and flexibility. - - :return: "np.typing.NDArray[Any]"[np.float64] - """ - - view = self.view(flow) - if len(view.dtype) == 0: - return view if self._variance_known else None - - if hasattr(view, "sum_of_weights"): - valid = view.sum_of_weights**2 > view.sum_of_weights_squared # type: ignore[union-attr] - return np.divide( - view.variance, - view.sum_of_weights, - out=np.full(view.sum_of_weights.shape, np.nan), - where=valid, - ) - - if hasattr(view, "count"): - return np.divide( - view.variance, - view.count, - out=np.full(view.count.shape, np.nan), - where=view.count > 1, - ) - - return view.variance - - def counts(self, flow: bool = False) -> np.typing.NDArray[Any]: - """ - Returns the number of entries in each bin for an unweighted - histogram or profile and an effective number of entries (defined below) - for a weighted histogram or profile. An exotic generalized histogram could - have no sensible .counts, so this is Optional and should be checked by - Consumers. - - If kind == "MEAN", counts (effective or not) can and should be used to - determine whether the mean value and its variance should be displayed - (see documentation of values and variances, respectively). The counts - should also be used to compute the error on the mean (see documentation - of variances). - - For a weighted histogram, counts is defined as sum_of_weights ** 2 / - sum_of_weights_squared. It is equal or less than the number of times - the bin was filled, the equality holds when all filled weights are equal. - The larger the spread in weights, the smaller it is, but it is always 0 - if filled 0 times, and 1 if filled once, and more than 1 otherwise. - - :return: "np.typing.NDArray[Any]"[np.float64] - """ - - view = self.view(flow) - - if len(view.dtype) == 0: - return view - - if hasattr(view, "sum_of_weights"): - return np.divide( - view.sum_of_weights**2, - view.sum_of_weights_squared, # type: ignore[union-attr] - out=np.zeros_like(view.sum_of_weights, dtype=np.float64), - where=view.sum_of_weights_squared != 0, # type: ignore[union-attr] - ) - - if hasattr(view, "count"): - return view.count - - return view.value - - -if TYPE_CHECKING: - from uhi.typing.plottable import PlottableHistogram - - _: PlottableHistogram = typing.cast(Histogram, None) diff --git a/src/boost_histogram/_utils.py b/src/boost_histogram/_utils.py index 92ebd7c4..fbbbafbf 100644 --- a/src/boost_histogram/_utils.py +++ b/src/boost_histogram/_utils.py @@ -19,19 +19,6 @@ class HasFamily(Protocol): T = TypeVar("T") -def set_module(name: str) -> Callable[[type[T]], type[T]]: - """ - Set the __module__ attribute on a class. Very - similar to numpy.core.overrides.set_module. - """ - - def add_module(cls: type[T]) -> type[T]: - cls.__module__ = name - return cls - - return add_module - - def register( cpp_types: set[type[object]] | None = None, ) -> Callable[[type[T]], type[T]]: diff --git a/src/boost_histogram/numpy.py b/src/boost_histogram/numpy.py index 827b266d..e4b269b8 100644 --- a/src/boost_histogram/numpy.py +++ b/src/boost_histogram/numpy.py @@ -13,9 +13,9 @@ import numpy as np +from . import Histogram from . import axis as _axis from . import storage as _storage -from ._internal import hist as _hist __all__ = ("histogram", "histogram2d", "histogramdd") @@ -35,11 +35,11 @@ def histogramdd( weights: ArrayLike | None = None, density: bool = False, *, - histogram: None | (type[_hist.Histogram]) = None, + histogram: None | (type[Histogram]) = None, storage: _storage.Storage = _storage.Double(), # noqa: B008 threads: int | None = None, ) -> Any: - cls: type[_hist.Histogram] = _hist.Histogram if histogram is None else histogram + cls: type[Histogram] = Histogram if histogram is None else histogram if normed is not None: raise KeyError( @@ -104,7 +104,7 @@ def histogram2d( weights: ArrayLike | None = None, density: bool = False, *, - histogram: None | (type[_hist.Histogram]) = None, + histogram: None | (type[Histogram]) = None, storage: _storage.Storage = _storage.Double(), # noqa: B008 threads: int | None = None, ) -> Any: @@ -135,7 +135,7 @@ def histogram( weights: ArrayLike | None = None, density: bool = False, *, - histogram: None | (type[_hist.Histogram]) = None, + histogram: None | (type[Histogram]) = None, storage: _storage.Storage | None = None, threads: int | None = None, ) -> Any: From 2b432f6b3e59d7f0cc740bfb92b581eb7404845f Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Wed, 29 Jan 2025 17:14:36 -0500 Subject: [PATCH 11/16] chore: put Histogram back into file Signed-off-by: Henry Schreiner --- docs/api/boost_histogram.axis.rst | 1 + docs/api/boost_histogram.rst | 9 + src/boost_histogram/__init__.py | 1274 +--------------------------- src/boost_histogram/histogram.py | 1285 +++++++++++++++++++++++++++++ 4 files changed, 1296 insertions(+), 1273 deletions(-) create mode 100644 src/boost_histogram/histogram.py diff --git a/docs/api/boost_histogram.axis.rst b/docs/api/boost_histogram.axis.rst index c88aa179..39978d2b 100644 --- a/docs/api/boost_histogram.axis.rst +++ b/docs/api/boost_histogram.axis.rst @@ -14,3 +14,4 @@ boost\_histogram.axis.transform :members: :undoc-members: :show-inheritance: + diff --git a/docs/api/boost_histogram.rst b/docs/api/boost_histogram.rst index 302bb372..79607999 100644 --- a/docs/api/boost_histogram.rst +++ b/docs/api/boost_histogram.rst @@ -18,6 +18,14 @@ boost\_histogram.accumulators :undoc-members: :show-inheritance: +boost\_histogram.histogram +========================== + +.. automodule:: boost_histogram.histogram + :members: + :undoc-members: + :show-inheritance: + boost\_histogram.numpy ====================== @@ -57,3 +65,4 @@ boost\_histogram.view :members: :undoc-members: :show-inheritance: + diff --git a/src/boost_histogram/__init__.py b/src/boost_histogram/__init__.py index 0e80b765..8747d591 100644 --- a/src/boost_histogram/__init__.py +++ b/src/boost_histogram/__init__.py @@ -1,37 +1,7 @@ from __future__ import annotations -import collections.abc -import copy -import logging -import threading -import typing -import warnings -from enum import Enum -from os import cpu_count -from typing import ( - TYPE_CHECKING, - Any, - Callable, - ClassVar, - Iterable, - List, - Mapping, - NewType, - SupportsIndex, - Tuple, - TypeVar, - Union, -) - -import numpy as np - -import boost_histogram -from boost_histogram import _core - from . import accumulators, axis, numpy, storage -from ._utils import cast, register -from .axis import AxesTuple, Axis, Variable -from .storage import Double, Storage +from .histogram import Histogram, IndexingExpr, Kind from .tag import ( # pylint: disable=redefined-builtin loc, overflow, @@ -39,43 +9,10 @@ sum, underflow, ) -from .typing import Accumulator, ArrayLike, CppHistogram -from .view import MeanView, WeightedMeanView, WeightedSumView, _to_view - -if TYPE_CHECKING: - from builtins import ellipsis # pylint: disable-next=import-error from .version import version as __version__ -try: - from . import _core -except ImportError as err: - msg = str(err) - if "_core" not in msg: - raise - - new_msg = "Did you forget to compile boost-histogram? Use CMake or Setuptools to build, see the readme." - total_msg = f"{msg}\n{new_msg}" - - new_exception = type(err)(new_msg, name=err.name, path=err.path) - raise new_exception from err - - -# This is a StrEnum as defined in Python 3.10 -class Kind(str, Enum): - COUNT = "COUNT" - MEAN = "MEAN" - - # This cast + type ignore is really odd, so it deserves a quick - # explanation. If we just set this like StrEnum does, then mypy complains - # that the type is changing (str -> Kind). If we type: ignore, then - # MyPy claims that the type: ignore is not needed. If we cast, we get the - # same error as before. But if we cast and type: ignore, it now works. - # Will report to MyPy. Tested on 0.800. - __str__ = typing.cast(Callable[["Kind"], str], str.__str__) # type: ignore[assignment] - - __all__ = [ "Histogram", "IndexingExpr", @@ -95,1212 +32,3 @@ class Kind(str, Enum): def __dir__() -> list[str]: return __all__ - - -# Support cloudpickle - pybind11 submodules do not have __file__ attributes -# And setting this in C++ causes a segfault -_core.accumulators.__file__ = _core.__file__ -_core.algorithm.__file__ = _core.__file__ -_core.axis.__file__ = _core.__file__ -_core.axis.transform.__file__ = _core.__file__ -_core.hist.__file__ = _core.__file__ -_core.storage.__file__ = _core.__file__ - - -NOTHING = object() - - -_histograms: set[type[CppHistogram]] = { - _core.hist.any_double, - _core.hist.any_int64, - _core.hist.any_atomic_int64, - _core.hist.any_unlimited, - _core.hist.any_weight, - _core.hist.any_mean, - _core.hist.any_weighted_mean, -} - -logger = logging.getLogger(__name__) - - -CppAxis = NewType("CppAxis", object) - -SimpleIndexing = Union[SupportsIndex, slice] -InnerIndexing = Union[SimpleIndexing, Callable[[Axis], int]] -FullInnerIndexing = Union[InnerIndexing, List[InnerIndexing]] -IndexingWithMapping = Union[FullInnerIndexing, Mapping[int, FullInnerIndexing]] -IndexingExpr = Union[IndexingWithMapping, Tuple[IndexingWithMapping, ...], "ellipsis"] - -T = TypeVar("T") - - -def _fill_cast( - value: T, *, inner: bool = False -) -> T | np.typing.NDArray[Any] | tuple[T, ...]: - """ - Convert to NumPy arrays. Some buffer objects do not get converted by forcecast. - If not called by itself (inner=False), then will work through one level of tuple/list. - """ - if value is None or isinstance(value, (str, bytes)): - return value - - if not inner and isinstance(value, (tuple, list)): - return tuple(_fill_cast(a, inner=True) for a in value) - - if hasattr(value, "__iter__") or hasattr(value, "__array__"): - return np.asarray(value) - - return value - - -def mean_storage_sample_check(sample: ArrayLike | None) -> None: - if sample is None: - raise TypeError("Sample key-argument (sample=) needs to be provided.") - seqs = (collections.abc.Sequence, np.ndarray) - msg1 = f"Sample key-argument needs to be a sequence, {sample.__class__.__name__} given." - if isinstance(sample, str) and not isinstance(sample, seqs): - raise ValueError(msg1) - sample_dim = np.array(sample).ndim - msg2 = f"Sample key-argument needs to be 1 dimensional, {sample_dim} given." - if sample_dim != 1: - raise ValueError(msg2) - - -def _arg_shortcut(item: tuple[int, float, float] | Axis | CppAxis) -> CppAxis: - if isinstance(item, tuple) and len(item) == 3: - msg = "Using () directly in constructor is a developer shortcut and will be removed in a future version" - warnings.warn(msg, FutureWarning, stacklevel=4) - return _core.axis.regular_uoflow(item[0], item[1], item[2]) # type: ignore[return-value] - - if isinstance(item, Axis): - return item._ax # type: ignore[no-any-return] - - raise TypeError("Only axes supported in histogram constructor") - - -def _expand_ellipsis(indexes: Iterable[Any], rank: int) -> list[Any]: - indexes = list(indexes) - number_ellipses = indexes.count(Ellipsis) - if number_ellipses == 0: - return indexes - if number_ellipses == 1: - index = indexes.index(Ellipsis) - additional = rank + 1 - len(indexes) - if additional < 0: - raise IndexError("too many indices for histogram") - - # Fill out the ellipsis with empty slices - return indexes[:index] + [slice(None)] * additional + indexes[index + 1 :] - - raise IndexError("an index can only have a single ellipsis ('...')") - - -H = TypeVar("H", bound="Histogram") - - -# We currently do not cast *to* a histogram, but this is consistent -# and could be used later. -@register(_histograms) # type: ignore[arg-type] -class Histogram: - # Note this is a __slots__ __dict__ class! - __slots__ = ( - "__dict__", - "_hist", - "axes", - ) - # .metadata and ._variance_known are part of the dict - - _family: ClassVar[object] = boost_histogram - - axes: AxesTuple - _hist: CppHistogram - _variance_known: bool - - def __init_subclass__(cls, *, family: object | None = None) -> None: - """ - Sets the family for the histogram. This should be a unique object (such - as the main module of your package) that is consistently set across all - subclasses. When converting back from C++, casting will try to always - pick the best matching family from the loaded subclasses for Axis and - such. - """ - super().__init_subclass__() - cls._family = family if family is not None else object() - - @typing.overload - def __init__(self, *args: Histogram) -> None: ... - - @typing.overload - def __init__(self, *args: CppHistogram, metadata: Any = ...) -> None: ... - - @typing.overload - def __init__( - self, - *axes: Axis | CppAxis, - storage: Storage = ..., - metadata: Any = ..., - ) -> None: ... - - def __init__( - self, - *axes: Axis | CppAxis | Histogram | CppHistogram, - storage: Storage = Double(), # noqa: B008 - metadata: Any = None, - ) -> None: - """ - Construct a new histogram. - - If you pass in a single argument, this will be treated as a - histogram and this will convert the histogram to this type of - histogram. - - Parameters - ---------- - *args : Axis - Provide 1 or more axis instances. - storage : Storage = bh.storage.Double() - Select a storage to use in the histogram - metadata : Any = None - Data that is passed along if a new histogram is created - """ - self._variance_known = True - - # Allow construction from a raw histogram object (internal) - if len(axes) == 1 and isinstance(axes[0], tuple(_histograms)): - cpp_hist: CppHistogram = axes[0] # type: ignore[assignment] - self._from_histogram_cpp(cpp_hist) - if metadata: - self.metadata = metadata - return - - # If we construct with another Histogram as the only positional argument, - # support that too - if len(axes) == 1 and isinstance(axes[0], Histogram): - normal_hist: Histogram = axes[0] - self._from_histogram_object(normal_hist) - if metadata: - self.metadata = metadata - return - - # Support objects that provide a to_boost method, like Uproot - if len(axes) == 1 and hasattr(axes[0], "_to_boost_histogram_"): - self._from_histogram_object(axes[0]._to_boost_histogram_()) - return - - if storage is None: - storage = Double() # type: ignore[unreachable] - - self.metadata = metadata - - # Check for missed parenthesis or incorrect types - if not isinstance(storage, Storage): - msg_storage = ( # type: ignore[unreachable] - "Passing in an initialized storage has been removed. Please add ()." - ) - msg_unknown = "Only storages allowed in storage argument" - raise KeyError(msg_storage if issubclass(storage, Storage) else msg_unknown) - - # Allow a tuple to represent a regular axis - axes = tuple(_arg_shortcut(arg) for arg in axes) # type: ignore[arg-type] - - if len(axes) > _core.hist._axes_limit: - msg = f"Too many axes, must be less than {_core.hist._axes_limit}" - raise IndexError(msg) - - # Check all available histograms, and if the storage matches, return that one - for h in _histograms: - if isinstance(storage, h._storage_type): - self._hist = h(axes, storage) # type: ignore[arg-type] - self.axes = self._generate_axes_() - return - - raise TypeError("Unsupported storage") - - @classmethod - def _clone( - cls: type[H], - _hist: Histogram | CppHistogram, - *, - other: Histogram | None = None, - memo: Any = NOTHING, - ) -> H: - """ - Clone a histogram (possibly of a different base). Does not trigger __init__. - This will copy data from `other=` if non-None, otherwise metadata gets copied from the input. - """ - - self = cls.__new__(cls) - if isinstance(_hist, tuple(_histograms)): - self._from_histogram_cpp(_hist) # type: ignore[arg-type] - if other is not None: - return cls._clone(self, other=other, memo=memo) - return self - - assert isinstance(_hist, Histogram) - - if other is None: - other = _hist - - self._from_histogram_object(_hist) - - if memo is NOTHING: - self.__dict__ = copy.copy(other.__dict__) - else: - self.__dict__ = copy.deepcopy(other.__dict__, memo) - - for ax in self.axes: - if memo is NOTHING: - ax.__dict__ = copy.copy(ax._ax.metadata) - else: - ax.__dict__ = copy.deepcopy(ax._ax.metadata, memo) - return self - - def _new_hist(self: H, _hist: CppHistogram, memo: Any = NOTHING) -> H: - """ - Return a new histogram given a new _hist, copying current metadata. - """ - return self.__class__._clone(_hist, other=self, memo=memo) - - def _from_histogram_cpp(self, other: CppHistogram) -> None: - """ - Import a Cpp histogram. - """ - self._variance_known = True - self._hist = other - self.metadata = None - self.axes = self._generate_axes_() - - def _from_histogram_object(self, other: Histogram) -> None: - """ - Convert self into a new histogram object based on another, possibly - converting from a different subclass. - """ - self._hist = other._hist - self.__dict__ = copy.copy(other.__dict__) - self.axes = self._generate_axes_() - for ax in self.axes: - ax.__dict__ = copy.copy(ax._ax.metadata) - - # Allow custom behavior on either "from" or "to" - other._export_bh_(self) - self._import_bh_() - - def _import_bh_(self) -> None: - """ - If any post-processing is needed to pass a histogram between libraries, a - subclass can implement it here. self is the new instance in the current - (converted-to) class. - """ - - @classmethod - def _export_bh_(cls, self: Histogram) -> None: - """ - If any preparation is needed to pass a histogram between libraries, a subclass can - implement it here. cls is the current class being converted from, and self is the - instance in the class being converted to. - """ - - def _generate_axes_(self) -> AxesTuple: - """ - This is called to fill in the axes. Subclasses can override it if they need - to change the axes tuple. - """ - - return AxesTuple(self._axis(i) for i in range(self.ndim)) - - @property - def ndim(self) -> int: - """ - Number of axes (dimensions) of the histogram. - """ - return self._hist.rank() - - def view( - self, flow: bool = False - ) -> np.typing.NDArray[Any] | WeightedSumView | WeightedMeanView | MeanView: - """ - Return a view into the data, optionally with overflow turned on. - """ - return _to_view(self._hist.view(flow)) - - def __array__( - self, - dtype: np.typing.DTypeLike | None = None, - *, - # pylint: disable-next=redefined-outer-name - copy: bool | None = None, - ) -> np.typing.NDArray[Any]: - # The copy kw is new in NumPy 2.0 - kwargs = {} - if copy is not None: - kwargs["copy"] = copy - return np.asarray(self.view(False), dtype=dtype, **kwargs) # type: ignore[call-overload] - - def __eq__(self, other: Any) -> bool: - return hasattr(other, "_hist") and self._hist == other._hist - - def __ne__(self, other: Any) -> bool: - return (not hasattr(other, "_hist")) or self._hist != other._hist - - def __add__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - result = self.copy(deep=False) - return result.__iadd__(other) - - def __iadd__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - if isinstance(other, (int, float)) and other == 0: - return self - self._compute_inplace_op("__iadd__", other) - - # Addition may change the axes if they can grow - self.axes = self._generate_axes_() - - return self - - def __radd__(self: H, other: np.typing.NDArray[Any] | float) -> H: - return self + other - - def __sub__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - result = self.copy(deep=False) - return result.__isub__(other) - - def __isub__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - if isinstance(other, (int, float)) and other == 0: - return self - self._compute_inplace_op("__isub__", other) - - self.axes = self._generate_axes_() - - return self - - # If these fail, the underlying object throws the correct error - def __mul__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - result = self.copy(deep=False) - return result._compute_inplace_op("__imul__", other) - - def __rmul__(self: H, other: np.typing.NDArray[Any] | float) -> H: - return self * other - - def __truediv__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - result = self.copy(deep=False) - return result._compute_inplace_op("__itruediv__", other) - - def __div__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - result = self.copy(deep=False) - return result._compute_inplace_op("__idiv__", other) - - def __idiv__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - return self._compute_inplace_op("__idiv__", other) - - def __itruediv__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - return self._compute_inplace_op("__itruediv__", other) - - def __imul__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: - return self._compute_inplace_op("__imul__", other) - - def _compute_inplace_op( - self: H, name: str, other: Histogram | np.typing.NDArray[Any] | float - ) -> H: - # Also takes CppHistogram, but that confuses mypy because it's hard to pick out - if isinstance(other, Histogram): - getattr(self._hist, name)(other._hist) - elif isinstance(other, tuple(_histograms)): - getattr(self._hist, name)(other) - elif hasattr(other, "shape") and other.shape: - assert not isinstance(other, float) - - if len(other.shape) != self.ndim: - msg = f"Number of dimensions {len(other.shape)} must match histogram {self.ndim}" - raise ValueError(msg) - - if all(a in {b, 1} for a, b in zip(other.shape, self.shape)): - view = self.view(flow=False) - getattr(view, name)(other) - elif all(a in {b, 1} for a, b in zip(other.shape, self.axes.extent)): - view = self.view(flow=True) - getattr(view, name)(other) - else: - msg = f"Wrong shape {other.shape}, expected {self.shape} or {self.axes.extent}" - raise ValueError(msg) - else: - view = self.view(flow=True) - getattr(view, name)(other) - - self._variance_known = False - return self - - # TODO: Marked as too complex by flake8. Should be factored out a bit. - def fill( - self: H, - *args: ArrayLike | str, - weight: ArrayLike | None = None, - sample: ArrayLike | None = None, - threads: int | None = None, - ) -> H: - """ - Insert data into the histogram. - - Parameters - ---------- - *args : Union[Array[float], Array[int], Array[str], float, int, str] - Provide one value or array per dimension. - weight : List[Union[Array[float], Array[int], float, int, str]]] - Provide weights (only if the histogram storage supports it) - sample : List[Union[Array[float], Array[int], Array[str], float, int, str]]] - Provide samples (only if the histogram storage supports it) - threads : Optional[int] - Fill with threads. Defaults to None, which does not activate - threaded filling. Using 0 will automatically pick the number of - available threads (usually two per core). - """ - - if self._hist._storage_type is _core.storage.mean: - mean_storage_sample_check(sample) - - if ( - self._hist._storage_type - not in { - _core.storage.weight, - _core.storage.mean, - _core.storage.weighted_mean, - } - and weight is not None - ): - self._variance_known = False - - # Convert to NumPy arrays - args_ars = _fill_cast(args) - weight_ars = _fill_cast(weight) - sample_ars = _fill_cast(sample) - - if threads == 0: - threads = cpu_count() - - if threads is None or threads == 1: - self._hist.fill(*args_ars, weight=weight_ars, sample=sample_ars) - return self - - if self._hist._storage_type in { - _core.storage.mean, - _core.storage.weighted_mean, - }: - raise RuntimeError("Mean histograms do not support threaded filling") - - data: list[list[np.typing.NDArray[Any]] | list[str]] = [ - np.array_split(a, threads) if not isinstance(a, str) else [a] * threads - for a in args_ars - ] - - weights: list[Any] - if weight is None or np.isscalar(weight): - assert threads is not None - weights = [weight_ars] * threads - else: - weights = np.array_split(weight_ars, threads) - - samples: list[Any] - if sample_ars is None or np.isscalar(sample_ars): - assert threads is not None - samples = [sample_ars] * threads - else: - samples = np.array_split(sample_ars, threads) - - if self._hist._storage_type is _core.storage.atomic_int64: - - def fun( - weight: ArrayLike | None, - sample: ArrayLike | None, - *args: np.typing.NDArray[Any], - ) -> None: - self._hist.fill(*args, weight=weight, sample=sample) - - else: - sum_lock = threading.Lock() - - def fun( - weight: ArrayLike | None, - sample: ArrayLike | None, - *args: np.typing.NDArray[Any], - ) -> None: - local_hist = copy.copy(self._hist) - local_hist.reset() - local_hist.fill(*args, weight=weight, sample=sample) - with sum_lock: - self._hist += local_hist - - thread_list = [ - threading.Thread(target=fun, args=arrays) - for arrays in zip(weights, samples, *data) - ] - - for thread in thread_list: - thread.start() - - for thread in thread_list: - thread.join() - - return self - - def __str__(self) -> str: - """ - A rendering of the histogram is made using ASCII or unicode characters - (whatever is supported by the terminal). What exactly is displayed is - still experimental. Do not rely on any particular rendering. - """ - # TODO check the terminal width and adjust the presentation - # only use for 1D, fall back to repr for ND - if self._hist.rank() != 1: - return repr(self) - s = str(self._hist) - # get rid of first line and last character - return s[s.index("\n") + 1 : -1] - - def _axis(self, i: int = 0) -> Axis: - """ - Get N-th axis. - """ - return cast(self, self._hist.axis(i), Axis) - - @property - def storage_type(self) -> type[Storage]: - return cast(self, self._hist._storage_type, Storage) # type: ignore[return-value] - - @property - def _storage_type(self) -> type[Storage]: - warnings.warn( - "Accessing storage type has changed from _storage_type to storage_type, and will be removed in future.", - DeprecationWarning, - stacklevel=2, - ) - return cast(self, self._hist._storage_type, Storage) # type: ignore[return-value] - - def _reduce(self: H, *args: Any) -> H: - return self._new_hist(self._hist.reduce(*args)) - - def __copy__(self: H) -> H: - return self._new_hist(copy.copy(self._hist)) - - def __deepcopy__(self: H, memo: Any) -> H: - return self._new_hist(copy.deepcopy(self._hist), memo=memo) - - def __getstate__(self) -> tuple[int, dict[str, Any]]: - """ - Version 0.8: metadata added - Version 0.11: version added and set to 0. metadata/_hist replaced with dict. - Version 0.12: _variance_known is now in the dict (no format change) - - ``dict`` contains __dict__ with added "_hist" - """ - local_dict = copy.copy(self.__dict__) - local_dict["_hist"] = self._hist - # Version 0 of boost-histogram pickle state - return (0, local_dict) - - def __setstate__(self, state: Any) -> None: - if isinstance(state, tuple): - if state[0] == 0: - for key, value in state[1].items(): - setattr(self, key, value) - - # Added in 0.12 - if "_variance_known" not in state[1]: - self._variance_known = True - else: - msg = f"Cannot open boost-histogram pickle v{state[0]}" - raise RuntimeError(msg) - - else: # Classic (0.10 and before) state - self._hist = state["_hist"] - self._variance_known = True - self.metadata = state.get("metadata", None) - for i in range(self._hist.rank()): - self._hist.axis(i).metadata = {"metadata": self._hist.axis(i).metadata} - - self.axes = self._generate_axes_() - - def __repr__(self) -> str: - newline = "\n " - first_newline = newline if len(self.axes) > 1 else "" - storage_newline = ( - newline if len(self.axes) > 1 else " " if len(self.axes) > 0 else "" - ) - sep = "," if len(self.axes) > 0 else "" - ret = f"{self.__class__.__name__}({first_newline}" - ret += f",{newline}".join(repr(ax) for ax in self.axes) - ret += f"{sep}{storage_newline}storage={self.storage_type()}" # pylint: disable=not-callable - ret += ")" - outer = self.sum(flow=True) - if outer: - inner = self.sum(flow=False) - ret += f" # Sum: {inner}" - if inner != outer: - ret += f" ({outer} with flow)" - return ret - - def _compute_uhi_index(self, index: InnerIndexing, axis: int) -> SimpleIndexing: - """ - Converts an expression that contains UHI locators to one that does not. - """ - # Support sum and rebin directly - if index is sum or hasattr(index, "factor"): # type: ignore[comparison-overlap] - return slice(None, None, index) - - # General locators - # Note that MyPy doesn't like these very much - the fix - # will be to properly set input types - if callable(index): - return index(self.axes[axis]) - - if isinstance(index, float): - raise TypeError(f"Index {index} must be an integer, not float") - - if isinstance(index, SupportsIndex): - if abs(int(index)) >= self._hist.axis(axis).size: - raise IndexError("histogram index is out of range") - return int(index) % self._hist.axis(axis).size - - return index - - def _compute_commonindex( - self, index: IndexingExpr - ) -> list[SupportsIndex | slice | Mapping[int, SupportsIndex | slice]]: - """ - Takes indices and returns two iterables; one is a tuple or dict of the - original, Ellipsis expanded index, and the other returns index, - operation value pairs. - """ - indexes: list[Any] - - # Shorten the computations with direct access to raw object - hist = self._hist - - # Support dict access - if hasattr(index, "items"): - indexes = [slice(None)] * hist.rank() - for k, v in index.items(): - indexes[k] = v - - # Normalize -> h[i] == h[i,] - else: - tuple_index = (index,) if not isinstance(index, tuple) else index - - # Now a list - indexes = _expand_ellipsis(tuple_index, hist.rank()) - - if len(indexes) != hist.rank(): - raise IndexError("Wrong number of indices for histogram") - - # Allow [bh.loc(...)] to work - # TODO: could be nicer making a new list via a comprehension - for i in range(len(indexes)): # pylint: disable=consider-using-enumerate - # Support list of UHI indexers - if isinstance(indexes[i], list): - indexes[i] = [self._compute_uhi_index(ind, i) for ind in indexes[i]] - else: - indexes[i] = self._compute_uhi_index(indexes[i], i) - - return indexes - - def to_numpy( - self, flow: bool = False, *, dd: bool = False, view: bool = False - ) -> ( - tuple[np.typing.NDArray[Any], ...] - | tuple[np.typing.NDArray[Any], tuple[np.typing.NDArray[Any], ...]] - ): - """ - Convert to a NumPy style tuple of return arrays. Edges are converted to - match NumPy standards, with upper edge inclusive, unlike - boost-histogram, where upper edge is exclusive. - - Parameters - ---------- - flow : bool = False - Include the flow bins. - dd : bool = False - Use the histogramdd return syntax, where the edges are in a tuple. - Otherwise, this is the histogram/histogram2d return style. - view : bool = False - The behavior for the return value. By default, this will return - array of the values only regardless of the storage (which is all - NumPy's histogram function can do). view=True will return the - boost-histogram view of the storage. - - Return - ------ - contents : Array[Any] - The bin contents - *edges : Array[float] - The edges for each dimension - """ - - hist, *edges = self._hist.to_numpy(flow) - hist = self.view(flow=flow) if view else self.values(flow=flow) - - return (hist, edges) if dd else (hist, *edges) - - def copy(self: H, *, deep: bool = True) -> H: - """ - Make a copy of the histogram. Defaults to making a - deep copy (axis metadata copied); use deep=False - to avoid making a copy of axis metadata. - """ - - return copy.deepcopy(self) if deep else copy.copy(self) - - def reset(self: H) -> H: - """ - Clear the bin counters. - """ - self._hist.reset() - return self - - def empty(self, flow: bool = False) -> bool: - """ - Check to see if the histogram has any non-default values. - You can use flow=True to check flow bins too. - """ - return self._hist.empty(flow) - - def sum(self, flow: bool = False) -> float | Accumulator: - """ - Compute the sum over the histogram bins (optionally including the flow bins). - """ - return self._hist.sum(flow) # type: ignore[no-any-return] - - @property - def size(self) -> int: - """ - Total number of bins in the histogram (including underflow/overflow). - """ - return self._hist.size() - - @property - def shape(self) -> tuple[int, ...]: - """ - Tuple of axis sizes (not including underflow/overflow). - """ - return self.axes.size - - # TODO: Marked as too complex by flake8. Should be factored out a bit. - def __getitem__(self: H, index: IndexingExpr) -> H | float | Accumulator: - indexes = self._compute_commonindex(index) - - # If this is (now) all integers, return the bin contents - # But don't try *dict! - if not hasattr(indexes, "items") and all( - isinstance(a, SupportsIndex) for a in indexes - ): - return self._hist.at(*indexes) # type: ignore[no-any-return, arg-type] - - integrations: set[int] = set() - slices: list[_core.algorithm.reduce_command] = [] - pick_each: dict[int, int] = {} - pick_set: dict[int, list[int]] = {} - reduced: CppHistogram | None = None - - # Compute needed slices and projections - for i, ind in enumerate(indexes): - if isinstance(ind, SupportsIndex): - pick_each[i] = ind.__index__() + ( - 1 if self.axes[i].traits.underflow else 0 - ) - continue - - if isinstance(ind, collections.abc.Sequence): - pick_set[i] = list(ind) - continue - - if not isinstance(ind, slice): - raise IndexError( - "Must be a slice, an integer, or follow the locator protocol." - ) - - # If the dictionary brackets are forgotten, it's easy to put a slice - # into a slice - adding a nicer error message in that case - if any(isinstance(v, slice) for v in (ind.start, ind.stop, ind.step)): - raise TypeError( - "You have put a slice in a slice. Did you forget curly braces [{...}]?" - ) - - # This ensures that callable start/stop are handled - start, stop = self.axes[i]._process_loc(ind.start, ind.stop) - - groups = [] - if ind != slice(None): - merge = 1 - if ind.step is not None: - if getattr(ind.step, "factor", None) is not None: - merge = ind.step.factor - elif ( - hasattr(ind.step, "group_mapping") - and (tmp_groups := ind.step.group_mapping(self.axes[i])) - is not None - ): - groups = tmp_groups - elif callable(ind.step): - if ind.step is sum: - integrations.add(i) - else: - raise NotImplementedError - - if ind.start is not None or ind.stop is not None: - slices.append( - _core.algorithm.slice( - i, start, stop, _core.algorithm.slice_mode.crop - ) - ) - if len(groups) == 0: - continue - else: - raise IndexError( - "The third argument to a slice must be rebin or projection" - ) - - assert isinstance(start, int) - assert isinstance(stop, int) - # rebinning with factor - if len(groups) == 0: - slices.append( - _core.algorithm.slice_and_rebin(i, start, stop, merge) - ) - # rebinning with groups - elif len(groups) != 0: - if not reduced: - reduced = self._hist - axes = [reduced.axis(x) for x in range(reduced.rank())] - reduced_view = reduced.view(flow=True) - new_axes_indices = [axes[i].edges[0]] - - j = 0 - for group in groups: - new_axes_indices += [axes[i].edges[j + group]] - j += group - - variable_axis = Variable( - new_axes_indices, metadata=axes[i].metadata - ) - axes[i] = variable_axis._ax - - logger.debug("Axes: %s", axes) - - new_reduced = reduced.__class__(axes) - new_view = new_reduced.view(flow=True) - - j = 1 - for new_j, group in enumerate(groups): - for _ in range(group): - pos = [slice(None)] * (i) - new_view[(*pos, new_j + 1, ...)] += _to_view( - reduced_view[(*pos, j, ...)] - ) - j += 1 - - reduced = new_reduced - - # Will be updated below - if (slices or pick_set or pick_each or integrations) and not reduced: - reduced = self._hist - elif not reduced: - reduced = copy.copy(self._hist) - - if pick_each: - tuple_slice = tuple( - pick_each.get(i, slice(None)) for i in range(reduced.rank()) - ) - logger.debug("Slices for pick each: %s", tuple_slice) - axes = [ - reduced.axis(i) for i in range(reduced.rank()) if i not in pick_each - ] - logger.debug("Axes: %s", axes) - new_reduced = reduced.__class__(axes) - new_reduced.view(flow=True)[...] = reduced.view(flow=True)[tuple_slice] - reduced = new_reduced - integrations = {i - sum(j <= i for j in pick_each) for i in integrations} - pick_set = { - i - sum(j <= i for j in pick_each): v for i, v in pick_set.items() - } - for slice_ in slices: - slice_.iaxis -= sum(j <= slice_.iaxis for j in pick_each) - - if slices: - logger.debug("Reduce with %s", slices) - reduced = reduced.reduce(*slices) - - if pick_set: - warnings.warn( - "List indexing selection is experimental. Removed bins are not placed in overflow.", - stacklevel=2, - ) - logger.debug("Slices for picking sets: %s", pick_set) - axes = [reduced.axis(i) for i in range(reduced.rank())] - reduced_view = reduced.view(flow=True) - for i in pick_set: # pylint: disable=consider-using-dict-items - selection = copy.copy(pick_set[i]) - ax = reduced.axis(i) - if ax.traits_ordered: - msg = f"Axis {i} is not a categorical axis, cannot pick with list: {ax}" - raise RuntimeError(msg) - - if ax.traits_overflow and ax.size not in pick_set[i]: - selection.append(ax.size) - - new_axis = axes[i].__class__([axes[i].value(j) for j in pick_set[i]]) # type: ignore[call-arg] - new_axis.metadata = axes[i].metadata - axes[i] = new_axis - reduced_view = np.take(reduced_view, selection, axis=i) - - logger.debug("Axes: %s", axes) - new_reduced = reduced.__class__(axes) - new_reduced.view(flow=True)[...] = reduced_view - reduced = new_reduced - - if integrations: - projections = [i for i in range(reduced.rank()) if i not in integrations] - reduced = reduced.project(*projections) - - return self._new_hist(reduced) if reduced.rank() > 0 else reduced.sum(flow=True) - - def __setitem__(self, index: IndexingExpr, value: ArrayLike | Accumulator) -> None: - """ - There are several supported possibilities: - - h[slice] = array # same size - - If an array is given to a compatible slice, it is set. - - h[a:] = array # One larger - - If an array is given that does not match, if it does match the - with-overflow size, it fills that. - - PLANNED (not yet supported): - - h[a:] = h2 - - If another histogram is given, that must either match with or without - overflow, where the overflow bins must be overflow bins (that is, - you cannot set a histogram's flow bins from another histogram that - is 2 larger). Bin edges must be a close match, as well. If you don't - want this level of type safety, just use ``h[...] = h2.view()``. - """ - indexes = self._compute_commonindex(index) - - if isinstance(value, Histogram): - raise TypeError("Not supported yet") - - value = np.asarray(value) - view = self.view(flow=True) - - value_shape: tuple[int, ...] - # Support raw arrays for accumulators, the final dimension is the constructor values - if ( - value.ndim > 0 - and len(view.dtype) > 0 - and len(value.dtype) == 0 - and len(view.dtype) == value.shape[-1] - ): - value_shape = value.shape[:-1] - value_ndim = value.ndim - 1 - else: - value_shape = value.shape - value_ndim = value.ndim - - # NumPy does not broadcast partial slices, but we would need - # to allow it (because we do allow broadcasting up dimensions) - # Instead, we simply require matching dimensions. - if value_ndim > 0 and value_ndim != sum(isinstance(i, slice) for i in indexes): - msg = f"Setting a {len(indexes)}D histogram with a {value_ndim}D array must have a matching number of dimensions" - raise ValueError(msg) - - # Here, value_n does not increment with n if this is not a slice - value_n = 0 - for n, request in enumerate(indexes): - has_underflow = self.axes[n].traits.underflow - has_overflow = self.axes[n].traits.overflow - - if isinstance(request, slice): - # Only consider underflow/overflow if the endpoints are not given - use_underflow = has_underflow and request.start is None - use_overflow = has_overflow and request.stop is None - - # Make the limits explicit since we may need to shift them - start = 0 if request.start is None else request.start - stop = len(self.axes[n]) if request.stop is None else request.stop - request_len = stop - start - - # If set to a scalar, then treat it like broadcasting without flow bins - if value_ndim == 0: - start = 0 + has_overflow - stop = len(self.axes[n]) + has_underflow - - # Normal setting - elif request_len == value_shape[value_n]: - start += has_underflow - stop += has_underflow - - # Expanded setting - elif request_len + use_underflow + use_overflow == value_shape[value_n]: - start += has_underflow and not use_underflow - stop += has_underflow + (has_overflow and use_overflow) - - # Single element broadcasting - elif value_shape[value_n] == 1: - start += has_underflow - stop += has_underflow - - else: - msg = f"Mismatched shapes in dimension {n}" - msg += f", {value_shape[n]} != {request_len}" - if use_underflow or use_overflow: - msg += f" or {request_len + use_underflow + use_overflow}" - raise ValueError(msg) - indexes[n] = slice(start, stop, request.step) - value_n += 1 - else: - indexes[n] = request + has_underflow - - view[tuple(indexes)] = value # type: ignore[arg-type] - - def project(self: H, *args: int) -> H | float | Accumulator: - """ - Project to a single axis or several axes on a multidimensional histogram. - Provided a list of axis numbers, this will produce the histogram over - those axes only. Flow bins are used if available. - """ - for arg in args: - if arg < 0 or arg >= self.ndim: - raise ValueError( - f"Projection axis must be a valid axis number 0 to {self.ndim - 1}, not {arg}" - ) - - return self._new_hist(self._hist.project(*args)) - - # Implementation of PlottableHistogram - - @property - def kind(self) -> Kind: - """ - Returns Kind.COUNT if this is a normal summing histogram, and Kind.MEAN if this is a - mean histogram. - - :return: Kind - """ - mean = self._hist._storage_type in { - _core.storage.mean, - _core.storage.weighted_mean, - } - - return Kind.MEAN if mean else Kind.COUNT - - def values(self, flow: bool = False) -> np.typing.NDArray[Any]: - """ - Returns the accumulated values. The counts for simple histograms, the - sum of weights for weighted histograms, the mean for profiles, etc. - - If counts is equal to 0, the value in that cell is undefined if - kind == "MEAN". - - :param flow: Enable flow bins. Not part of PlottableHistogram, but - included for consistency with other methods and flexibility. - - :return: "np.typing.NDArray[Any]"[np.float64] - """ - - view = self.view(flow) - # TODO: Might be a NumPy typing bug - if len(view.dtype) == 0: - return view - return view.value - - def variances(self, flow: bool = False) -> np.typing.NDArray[Any] | None: - """ - Returns the estimated variance of the accumulated values. The sum of squared - weights for weighted histograms, the variance of samples for profiles, etc. - For an unweighed histogram where kind == "COUNT", this should return the same - as values if the histogram was not filled with weights, and None otherwise. - If counts is equal to 1 or less, the variance in that cell is undefined if - kind == "MEAN". This must be written <= 1, and not < 2; when this - effective counts (weighed mean), then counts could be less than 2 but - more than 1. - - If kind == "MEAN", the counts can be used to compute the error on the mean - as sqrt(variances / counts), this works whether or not the entries are - weighted if the weight variance was tracked by the implementation. - - Currently, this always returns - but in the future, it will return None - if a weighted fill is made on a unweighed storage. - - :param flow: Enable flow bins. Not part of PlottableHistogram, but - included for consistency with other methods and flexibility. - - :return: "np.typing.NDArray[Any]"[np.float64] - """ - - view = self.view(flow) - if len(view.dtype) == 0: - return view if self._variance_known else None - - if hasattr(view, "sum_of_weights"): - valid = view.sum_of_weights**2 > view.sum_of_weights_squared # type: ignore[union-attr] - return np.divide( - view.variance, - view.sum_of_weights, - out=np.full(view.sum_of_weights.shape, np.nan), - where=valid, - ) - - if hasattr(view, "count"): - return np.divide( - view.variance, - view.count, - out=np.full(view.count.shape, np.nan), - where=view.count > 1, - ) - - return view.variance - - def counts(self, flow: bool = False) -> np.typing.NDArray[Any]: - """ - Returns the number of entries in each bin for an unweighted - histogram or profile and an effective number of entries (defined below) - for a weighted histogram or profile. An exotic generalized histogram could - have no sensible .counts, so this is Optional and should be checked by - Consumers. - - If kind == "MEAN", counts (effective or not) can and should be used to - determine whether the mean value and its variance should be displayed - (see documentation of values and variances, respectively). The counts - should also be used to compute the error on the mean (see documentation - of variances). - - For a weighted histogram, counts is defined as sum_of_weights ** 2 / - sum_of_weights_squared. It is equal or less than the number of times - the bin was filled, the equality holds when all filled weights are equal. - The larger the spread in weights, the smaller it is, but it is always 0 - if filled 0 times, and 1 if filled once, and more than 1 otherwise. - - :return: "np.typing.NDArray[Any]"[np.float64] - """ - - view = self.view(flow) - - if len(view.dtype) == 0: - return view - - if hasattr(view, "sum_of_weights"): - return np.divide( - view.sum_of_weights**2, - view.sum_of_weights_squared, # type: ignore[union-attr] - out=np.zeros_like(view.sum_of_weights, dtype=np.float64), - where=view.sum_of_weights_squared != 0, # type: ignore[union-attr] - ) - - if hasattr(view, "count"): - return view.count - - return view.value - - -if TYPE_CHECKING: - from uhi.typing.plottable import PlottableHistogram - - _: PlottableHistogram = typing.cast(Histogram, None) diff --git a/src/boost_histogram/histogram.py b/src/boost_histogram/histogram.py new file mode 100644 index 00000000..571e15dd --- /dev/null +++ b/src/boost_histogram/histogram.py @@ -0,0 +1,1285 @@ +from __future__ import annotations + +import collections.abc +import copy +import logging +import threading +import typing +import warnings +from enum import Enum +from os import cpu_count +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Iterable, + List, + Mapping, + NewType, + SupportsIndex, + Tuple, + TypeVar, + Union, +) + +import numpy as np + +import boost_histogram +from boost_histogram import _core + +from ._utils import cast, register +from .axis import AxesTuple, Axis, Variable +from .storage import Double, Storage +from .typing import Accumulator, ArrayLike, CppHistogram +from .view import MeanView, WeightedMeanView, WeightedSumView, _to_view + +if TYPE_CHECKING: + from builtins import ellipsis + +try: + from . import _core +except ImportError as err: + msg = str(err) + if "_core" not in msg: + raise + + new_msg = "Did you forget to compile boost-histogram? Use CMake or Setuptools to build, see the readme." + total_msg = f"{msg}\n{new_msg}" + + new_exception = type(err)(new_msg, name=err.name, path=err.path) + raise new_exception from err + + +# This is a StrEnum as defined in Python 3.10 +class Kind(str, Enum): + COUNT = "COUNT" + MEAN = "MEAN" + + # This cast + type ignore is really odd, so it deserves a quick + # explanation. If we just set this like StrEnum does, then mypy complains + # that the type is changing (str -> Kind). If we type: ignore, then + # MyPy claims that the type: ignore is not needed. If we cast, we get the + # same error as before. But if we cast and type: ignore, it now works. + # Will report to MyPy. Tested on 0.800. + __str__ = typing.cast(Callable[["Kind"], str], str.__str__) # type: ignore[assignment] + + +__all__ = [ + "Histogram", + "IndexingExpr", + "Kind", +] + + +def __dir__() -> list[str]: + return __all__ + + +# Support cloudpickle - pybind11 submodules do not have __file__ attributes +# And setting this in C++ causes a segfault +_core.accumulators.__file__ = _core.__file__ +_core.algorithm.__file__ = _core.__file__ +_core.axis.__file__ = _core.__file__ +_core.axis.transform.__file__ = _core.__file__ +_core.hist.__file__ = _core.__file__ +_core.storage.__file__ = _core.__file__ + + +NOTHING = object() + + +_histograms: set[type[CppHistogram]] = { + _core.hist.any_double, + _core.hist.any_int64, + _core.hist.any_atomic_int64, + _core.hist.any_unlimited, + _core.hist.any_weight, + _core.hist.any_mean, + _core.hist.any_weighted_mean, +} + +logger = logging.getLogger(__name__) + + +CppAxis = NewType("CppAxis", object) + +SimpleIndexing = Union[SupportsIndex, slice] +InnerIndexing = Union[SimpleIndexing, Callable[[Axis], int]] +FullInnerIndexing = Union[InnerIndexing, List[InnerIndexing]] +IndexingWithMapping = Union[FullInnerIndexing, Mapping[int, FullInnerIndexing]] +IndexingExpr = Union[IndexingWithMapping, Tuple[IndexingWithMapping, ...], "ellipsis"] + +T = TypeVar("T") + + +def _fill_cast( + value: T, *, inner: bool = False +) -> T | np.typing.NDArray[Any] | tuple[T, ...]: + """ + Convert to NumPy arrays. Some buffer objects do not get converted by forcecast. + If not called by itself (inner=False), then will work through one level of tuple/list. + """ + if value is None or isinstance(value, (str, bytes)): + return value + + if not inner and isinstance(value, (tuple, list)): + return tuple(_fill_cast(a, inner=True) for a in value) + + if hasattr(value, "__iter__") or hasattr(value, "__array__"): + return np.asarray(value) + + return value + + +def mean_storage_sample_check(sample: ArrayLike | None) -> None: + if sample is None: + raise TypeError("Sample key-argument (sample=) needs to be provided.") + seqs = (collections.abc.Sequence, np.ndarray) + msg1 = f"Sample key-argument needs to be a sequence, {sample.__class__.__name__} given." + if isinstance(sample, str) and not isinstance(sample, seqs): + raise ValueError(msg1) + sample_dim = np.array(sample).ndim + msg2 = f"Sample key-argument needs to be 1 dimensional, {sample_dim} given." + if sample_dim != 1: + raise ValueError(msg2) + + +def _arg_shortcut(item: tuple[int, float, float] | Axis | CppAxis) -> CppAxis: + if isinstance(item, tuple) and len(item) == 3: + msg = "Using () directly in constructor is a developer shortcut and will be removed in a future version" + warnings.warn(msg, FutureWarning, stacklevel=4) + return _core.axis.regular_uoflow(item[0], item[1], item[2]) # type: ignore[return-value] + + if isinstance(item, Axis): + return item._ax # type: ignore[no-any-return] + + raise TypeError("Only axes supported in histogram constructor") + + +def _expand_ellipsis(indexes: Iterable[Any], rank: int) -> list[Any]: + indexes = list(indexes) + number_ellipses = indexes.count(Ellipsis) + if number_ellipses == 0: + return indexes + if number_ellipses == 1: + index = indexes.index(Ellipsis) + additional = rank + 1 - len(indexes) + if additional < 0: + raise IndexError("too many indices for histogram") + + # Fill out the ellipsis with empty slices + return indexes[:index] + [slice(None)] * additional + indexes[index + 1 :] + + raise IndexError("an index can only have a single ellipsis ('...')") + + +H = TypeVar("H", bound="Histogram") + + +# We currently do not cast *to* a histogram, but this is consistent +# and could be used later. +@register(_histograms) # type: ignore[arg-type] +class Histogram: + # Note this is a __slots__ __dict__ class! + __slots__ = ( + "__dict__", + "_hist", + "axes", + ) + # .metadata and ._variance_known are part of the dict + + _family: ClassVar[object] = boost_histogram + + axes: AxesTuple + _hist: CppHistogram + _variance_known: bool + + def __init_subclass__(cls, *, family: object | None = None) -> None: + """ + Sets the family for the histogram. This should be a unique object (such + as the main module of your package) that is consistently set across all + subclasses. When converting back from C++, casting will try to always + pick the best matching family from the loaded subclasses for Axis and + such. + """ + super().__init_subclass__() + cls._family = family if family is not None else object() + + @typing.overload + def __init__(self, *args: Histogram) -> None: ... + + @typing.overload + def __init__(self, *args: CppHistogram, metadata: Any = ...) -> None: ... + + @typing.overload + def __init__( + self, + *axes: Axis | CppAxis, + storage: Storage = ..., + metadata: Any = ..., + ) -> None: ... + + def __init__( + self, + *axes: Axis | CppAxis | Histogram | CppHistogram, + storage: Storage = Double(), # noqa: B008 + metadata: Any = None, + ) -> None: + """ + Construct a new histogram. + + If you pass in a single argument, this will be treated as a + histogram and this will convert the histogram to this type of + histogram. + + Parameters + ---------- + *args : Axis + Provide 1 or more axis instances. + storage : Storage = bh.storage.Double() + Select a storage to use in the histogram + metadata : Any = None + Data that is passed along if a new histogram is created + """ + self._variance_known = True + + # Allow construction from a raw histogram object (internal) + if len(axes) == 1 and isinstance(axes[0], tuple(_histograms)): + cpp_hist: CppHistogram = axes[0] # type: ignore[assignment] + self._from_histogram_cpp(cpp_hist) + if metadata: + self.metadata = metadata + return + + # If we construct with another Histogram as the only positional argument, + # support that too + if len(axes) == 1 and isinstance(axes[0], Histogram): + normal_hist: Histogram = axes[0] + self._from_histogram_object(normal_hist) + if metadata: + self.metadata = metadata + return + + # Support objects that provide a to_boost method, like Uproot + if len(axes) == 1 and hasattr(axes[0], "_to_boost_histogram_"): + self._from_histogram_object(axes[0]._to_boost_histogram_()) + return + + if storage is None: + storage = Double() # type: ignore[unreachable] + + self.metadata = metadata + + # Check for missed parenthesis or incorrect types + if not isinstance(storage, Storage): + msg_storage = ( # type: ignore[unreachable] + "Passing in an initialized storage has been removed. Please add ()." + ) + msg_unknown = "Only storages allowed in storage argument" + raise KeyError(msg_storage if issubclass(storage, Storage) else msg_unknown) + + # Allow a tuple to represent a regular axis + axes = tuple(_arg_shortcut(arg) for arg in axes) # type: ignore[arg-type] + + if len(axes) > _core.hist._axes_limit: + msg = f"Too many axes, must be less than {_core.hist._axes_limit}" + raise IndexError(msg) + + # Check all available histograms, and if the storage matches, return that one + for h in _histograms: + if isinstance(storage, h._storage_type): + self._hist = h(axes, storage) # type: ignore[arg-type] + self.axes = self._generate_axes_() + return + + raise TypeError("Unsupported storage") + + @classmethod + def _clone( + cls: type[H], + _hist: Histogram | CppHistogram, + *, + other: Histogram | None = None, + memo: Any = NOTHING, + ) -> H: + """ + Clone a histogram (possibly of a different base). Does not trigger __init__. + This will copy data from `other=` if non-None, otherwise metadata gets copied from the input. + """ + + self = cls.__new__(cls) + if isinstance(_hist, tuple(_histograms)): + self._from_histogram_cpp(_hist) # type: ignore[arg-type] + if other is not None: + return cls._clone(self, other=other, memo=memo) + return self + + assert isinstance(_hist, Histogram) + + if other is None: + other = _hist + + self._from_histogram_object(_hist) + + if memo is NOTHING: + self.__dict__ = copy.copy(other.__dict__) + else: + self.__dict__ = copy.deepcopy(other.__dict__, memo) + + for ax in self.axes: + if memo is NOTHING: + ax.__dict__ = copy.copy(ax._ax.metadata) + else: + ax.__dict__ = copy.deepcopy(ax._ax.metadata, memo) + return self + + def _new_hist(self: H, _hist: CppHistogram, memo: Any = NOTHING) -> H: + """ + Return a new histogram given a new _hist, copying current metadata. + """ + return self.__class__._clone(_hist, other=self, memo=memo) + + def _from_histogram_cpp(self, other: CppHistogram) -> None: + """ + Import a Cpp histogram. + """ + self._variance_known = True + self._hist = other + self.metadata = None + self.axes = self._generate_axes_() + + def _from_histogram_object(self, other: Histogram) -> None: + """ + Convert self into a new histogram object based on another, possibly + converting from a different subclass. + """ + self._hist = other._hist + self.__dict__ = copy.copy(other.__dict__) + self.axes = self._generate_axes_() + for ax in self.axes: + ax.__dict__ = copy.copy(ax._ax.metadata) + + # Allow custom behavior on either "from" or "to" + other._export_bh_(self) + self._import_bh_() + + def _import_bh_(self) -> None: + """ + If any post-processing is needed to pass a histogram between libraries, a + subclass can implement it here. self is the new instance in the current + (converted-to) class. + """ + + @classmethod + def _export_bh_(cls, self: Histogram) -> None: + """ + If any preparation is needed to pass a histogram between libraries, a subclass can + implement it here. cls is the current class being converted from, and self is the + instance in the class being converted to. + """ + + def _generate_axes_(self) -> AxesTuple: + """ + This is called to fill in the axes. Subclasses can override it if they need + to change the axes tuple. + """ + + return AxesTuple(self._axis(i) for i in range(self.ndim)) + + @property + def ndim(self) -> int: + """ + Number of axes (dimensions) of the histogram. + """ + return self._hist.rank() + + def view( + self, flow: bool = False + ) -> np.typing.NDArray[Any] | WeightedSumView | WeightedMeanView | MeanView: + """ + Return a view into the data, optionally with overflow turned on. + """ + return _to_view(self._hist.view(flow)) + + def __array__( + self, + dtype: np.typing.DTypeLike | None = None, + *, + # pylint: disable-next=redefined-outer-name + copy: bool | None = None, + ) -> np.typing.NDArray[Any]: + # The copy kw is new in NumPy 2.0 + kwargs = {} + if copy is not None: + kwargs["copy"] = copy + return np.asarray(self.view(False), dtype=dtype, **kwargs) # type: ignore[call-overload] + + def __eq__(self, other: Any) -> bool: + return hasattr(other, "_hist") and self._hist == other._hist + + def __ne__(self, other: Any) -> bool: + return (not hasattr(other, "_hist")) or self._hist != other._hist + + def __add__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + result = self.copy(deep=False) + return result.__iadd__(other) + + def __iadd__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + if isinstance(other, (int, float)) and other == 0: + return self + self._compute_inplace_op("__iadd__", other) + + # Addition may change the axes if they can grow + self.axes = self._generate_axes_() + + return self + + def __radd__(self: H, other: np.typing.NDArray[Any] | float) -> H: + return self + other + + def __sub__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + result = self.copy(deep=False) + return result.__isub__(other) + + def __isub__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + if isinstance(other, (int, float)) and other == 0: + return self + self._compute_inplace_op("__isub__", other) + + self.axes = self._generate_axes_() + + return self + + # If these fail, the underlying object throws the correct error + def __mul__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + result = self.copy(deep=False) + return result._compute_inplace_op("__imul__", other) + + def __rmul__(self: H, other: np.typing.NDArray[Any] | float) -> H: + return self * other + + def __truediv__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + result = self.copy(deep=False) + return result._compute_inplace_op("__itruediv__", other) + + def __div__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + result = self.copy(deep=False) + return result._compute_inplace_op("__idiv__", other) + + def __idiv__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + return self._compute_inplace_op("__idiv__", other) + + def __itruediv__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + return self._compute_inplace_op("__itruediv__", other) + + def __imul__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: + return self._compute_inplace_op("__imul__", other) + + def _compute_inplace_op( + self: H, name: str, other: Histogram | np.typing.NDArray[Any] | float + ) -> H: + # Also takes CppHistogram, but that confuses mypy because it's hard to pick out + if isinstance(other, Histogram): + getattr(self._hist, name)(other._hist) + elif isinstance(other, tuple(_histograms)): + getattr(self._hist, name)(other) + elif hasattr(other, "shape") and other.shape: + assert not isinstance(other, float) + + if len(other.shape) != self.ndim: + msg = f"Number of dimensions {len(other.shape)} must match histogram {self.ndim}" + raise ValueError(msg) + + if all(a in {b, 1} for a, b in zip(other.shape, self.shape)): + view = self.view(flow=False) + getattr(view, name)(other) + elif all(a in {b, 1} for a, b in zip(other.shape, self.axes.extent)): + view = self.view(flow=True) + getattr(view, name)(other) + else: + msg = f"Wrong shape {other.shape}, expected {self.shape} or {self.axes.extent}" + raise ValueError(msg) + else: + view = self.view(flow=True) + getattr(view, name)(other) + + self._variance_known = False + return self + + # TODO: Marked as too complex by flake8. Should be factored out a bit. + def fill( + self: H, + *args: ArrayLike | str, + weight: ArrayLike | None = None, + sample: ArrayLike | None = None, + threads: int | None = None, + ) -> H: + """ + Insert data into the histogram. + + Parameters + ---------- + *args : Union[Array[float], Array[int], Array[str], float, int, str] + Provide one value or array per dimension. + weight : List[Union[Array[float], Array[int], float, int, str]]] + Provide weights (only if the histogram storage supports it) + sample : List[Union[Array[float], Array[int], Array[str], float, int, str]]] + Provide samples (only if the histogram storage supports it) + threads : Optional[int] + Fill with threads. Defaults to None, which does not activate + threaded filling. Using 0 will automatically pick the number of + available threads (usually two per core). + """ + + if self._hist._storage_type is _core.storage.mean: + mean_storage_sample_check(sample) + + if ( + self._hist._storage_type + not in { + _core.storage.weight, + _core.storage.mean, + _core.storage.weighted_mean, + } + and weight is not None + ): + self._variance_known = False + + # Convert to NumPy arrays + args_ars = _fill_cast(args) + weight_ars = _fill_cast(weight) + sample_ars = _fill_cast(sample) + + if threads == 0: + threads = cpu_count() + + if threads is None or threads == 1: + self._hist.fill(*args_ars, weight=weight_ars, sample=sample_ars) + return self + + if self._hist._storage_type in { + _core.storage.mean, + _core.storage.weighted_mean, + }: + raise RuntimeError("Mean histograms do not support threaded filling") + + data: list[list[np.typing.NDArray[Any]] | list[str]] = [ + np.array_split(a, threads) if not isinstance(a, str) else [a] * threads + for a in args_ars + ] + + weights: list[Any] + if weight is None or np.isscalar(weight): + assert threads is not None + weights = [weight_ars] * threads + else: + weights = np.array_split(weight_ars, threads) + + samples: list[Any] + if sample_ars is None or np.isscalar(sample_ars): + assert threads is not None + samples = [sample_ars] * threads + else: + samples = np.array_split(sample_ars, threads) + + if self._hist._storage_type is _core.storage.atomic_int64: + + def fun( + weight: ArrayLike | None, + sample: ArrayLike | None, + *args: np.typing.NDArray[Any], + ) -> None: + self._hist.fill(*args, weight=weight, sample=sample) + + else: + sum_lock = threading.Lock() + + def fun( + weight: ArrayLike | None, + sample: ArrayLike | None, + *args: np.typing.NDArray[Any], + ) -> None: + local_hist = copy.copy(self._hist) + local_hist.reset() + local_hist.fill(*args, weight=weight, sample=sample) + with sum_lock: + self._hist += local_hist + + thread_list = [ + threading.Thread(target=fun, args=arrays) + for arrays in zip(weights, samples, *data) + ] + + for thread in thread_list: + thread.start() + + for thread in thread_list: + thread.join() + + return self + + def __str__(self) -> str: + """ + A rendering of the histogram is made using ASCII or unicode characters + (whatever is supported by the terminal). What exactly is displayed is + still experimental. Do not rely on any particular rendering. + """ + # TODO check the terminal width and adjust the presentation + # only use for 1D, fall back to repr for ND + if self._hist.rank() != 1: + return repr(self) + s = str(self._hist) + # get rid of first line and last character + return s[s.index("\n") + 1 : -1] + + def _axis(self, i: int = 0) -> Axis: + """ + Get N-th axis. + """ + return cast(self, self._hist.axis(i), Axis) + + @property + def storage_type(self) -> type[Storage]: + return cast(self, self._hist._storage_type, Storage) # type: ignore[return-value] + + @property + def _storage_type(self) -> type[Storage]: + warnings.warn( + "Accessing storage type has changed from _storage_type to storage_type, and will be removed in future.", + DeprecationWarning, + stacklevel=2, + ) + return cast(self, self._hist._storage_type, Storage) # type: ignore[return-value] + + def _reduce(self: H, *args: Any) -> H: + return self._new_hist(self._hist.reduce(*args)) + + def __copy__(self: H) -> H: + return self._new_hist(copy.copy(self._hist)) + + def __deepcopy__(self: H, memo: Any) -> H: + return self._new_hist(copy.deepcopy(self._hist), memo=memo) + + def __getstate__(self) -> tuple[int, dict[str, Any]]: + """ + Version 0.8: metadata added + Version 0.11: version added and set to 0. metadata/_hist replaced with dict. + Version 0.12: _variance_known is now in the dict (no format change) + + ``dict`` contains __dict__ with added "_hist" + """ + local_dict = copy.copy(self.__dict__) + local_dict["_hist"] = self._hist + # Version 0 of boost-histogram pickle state + return (0, local_dict) + + def __setstate__(self, state: Any) -> None: + if isinstance(state, tuple): + if state[0] == 0: + for key, value in state[1].items(): + setattr(self, key, value) + + # Added in 0.12 + if "_variance_known" not in state[1]: + self._variance_known = True + else: + msg = f"Cannot open boost-histogram pickle v{state[0]}" + raise RuntimeError(msg) + + else: # Classic (0.10 and before) state + self._hist = state["_hist"] + self._variance_known = True + self.metadata = state.get("metadata", None) + for i in range(self._hist.rank()): + self._hist.axis(i).metadata = {"metadata": self._hist.axis(i).metadata} + + self.axes = self._generate_axes_() + + def __repr__(self) -> str: + newline = "\n " + first_newline = newline if len(self.axes) > 1 else "" + storage_newline = ( + newline if len(self.axes) > 1 else " " if len(self.axes) > 0 else "" + ) + sep = "," if len(self.axes) > 0 else "" + ret = f"{self.__class__.__name__}({first_newline}" + ret += f",{newline}".join(repr(ax) for ax in self.axes) + ret += f"{sep}{storage_newline}storage={self.storage_type()}" # pylint: disable=not-callable + ret += ")" + outer = self.sum(flow=True) + if outer: + inner = self.sum(flow=False) + ret += f" # Sum: {inner}" + if inner != outer: + ret += f" ({outer} with flow)" + return ret + + def _compute_uhi_index(self, index: InnerIndexing, axis: int) -> SimpleIndexing: + """ + Converts an expression that contains UHI locators to one that does not. + """ + # Support sum and rebin directly + if index is sum or hasattr(index, "factor"): # type: ignore[comparison-overlap] + return slice(None, None, index) + + # General locators + # Note that MyPy doesn't like these very much - the fix + # will be to properly set input types + if callable(index): + return index(self.axes[axis]) + + if isinstance(index, float): + raise TypeError(f"Index {index} must be an integer, not float") + + if isinstance(index, SupportsIndex): + if abs(int(index)) >= self._hist.axis(axis).size: + raise IndexError("histogram index is out of range") + return int(index) % self._hist.axis(axis).size + + return index + + def _compute_commonindex( + self, index: IndexingExpr + ) -> list[SupportsIndex | slice | Mapping[int, SupportsIndex | slice]]: + """ + Takes indices and returns two iterables; one is a tuple or dict of the + original, Ellipsis expanded index, and the other returns index, + operation value pairs. + """ + indexes: list[Any] + + # Shorten the computations with direct access to raw object + hist = self._hist + + # Support dict access + if hasattr(index, "items"): + indexes = [slice(None)] * hist.rank() + for k, v in index.items(): + indexes[k] = v + + # Normalize -> h[i] == h[i,] + else: + tuple_index = (index,) if not isinstance(index, tuple) else index + + # Now a list + indexes = _expand_ellipsis(tuple_index, hist.rank()) + + if len(indexes) != hist.rank(): + raise IndexError("Wrong number of indices for histogram") + + # Allow [bh.loc(...)] to work + # TODO: could be nicer making a new list via a comprehension + for i in range(len(indexes)): # pylint: disable=consider-using-enumerate + # Support list of UHI indexers + if isinstance(indexes[i], list): + indexes[i] = [self._compute_uhi_index(ind, i) for ind in indexes[i]] + else: + indexes[i] = self._compute_uhi_index(indexes[i], i) + + return indexes + + def to_numpy( + self, flow: bool = False, *, dd: bool = False, view: bool = False + ) -> ( + tuple[np.typing.NDArray[Any], ...] + | tuple[np.typing.NDArray[Any], tuple[np.typing.NDArray[Any], ...]] + ): + """ + Convert to a NumPy style tuple of return arrays. Edges are converted to + match NumPy standards, with upper edge inclusive, unlike + boost-histogram, where upper edge is exclusive. + + Parameters + ---------- + flow : bool = False + Include the flow bins. + dd : bool = False + Use the histogramdd return syntax, where the edges are in a tuple. + Otherwise, this is the histogram/histogram2d return style. + view : bool = False + The behavior for the return value. By default, this will return + array of the values only regardless of the storage (which is all + NumPy's histogram function can do). view=True will return the + boost-histogram view of the storage. + + Return + ------ + contents : Array[Any] + The bin contents + *edges : Array[float] + The edges for each dimension + """ + + hist, *edges = self._hist.to_numpy(flow) + hist = self.view(flow=flow) if view else self.values(flow=flow) + + return (hist, edges) if dd else (hist, *edges) + + def copy(self: H, *, deep: bool = True) -> H: + """ + Make a copy of the histogram. Defaults to making a + deep copy (axis metadata copied); use deep=False + to avoid making a copy of axis metadata. + """ + + return copy.deepcopy(self) if deep else copy.copy(self) + + def reset(self: H) -> H: + """ + Clear the bin counters. + """ + self._hist.reset() + return self + + def empty(self, flow: bool = False) -> bool: + """ + Check to see if the histogram has any non-default values. + You can use flow=True to check flow bins too. + """ + return self._hist.empty(flow) + + def sum(self, flow: bool = False) -> float | Accumulator: + """ + Compute the sum over the histogram bins (optionally including the flow bins). + """ + return self._hist.sum(flow) # type: ignore[no-any-return] + + @property + def size(self) -> int: + """ + Total number of bins in the histogram (including underflow/overflow). + """ + return self._hist.size() + + @property + def shape(self) -> tuple[int, ...]: + """ + Tuple of axis sizes (not including underflow/overflow). + """ + return self.axes.size + + # TODO: Marked as too complex by flake8. Should be factored out a bit. + def __getitem__(self: H, index: IndexingExpr) -> H | float | Accumulator: + indexes = self._compute_commonindex(index) + + # If this is (now) all integers, return the bin contents + # But don't try *dict! + if not hasattr(indexes, "items") and all( + isinstance(a, SupportsIndex) for a in indexes + ): + return self._hist.at(*indexes) # type: ignore[no-any-return, arg-type] + + integrations: set[int] = set() + slices: list[_core.algorithm.reduce_command] = [] + pick_each: dict[int, int] = {} + pick_set: dict[int, list[int]] = {} + reduced: CppHistogram | None = None + + # Compute needed slices and projections + for i, ind in enumerate(indexes): + if isinstance(ind, SupportsIndex): + pick_each[i] = ind.__index__() + ( + 1 if self.axes[i].traits.underflow else 0 + ) + continue + + if isinstance(ind, collections.abc.Sequence): + pick_set[i] = list(ind) + continue + + if not isinstance(ind, slice): + raise IndexError( + "Must be a slice, an integer, or follow the locator protocol." + ) + + # If the dictionary brackets are forgotten, it's easy to put a slice + # into a slice - adding a nicer error message in that case + if any(isinstance(v, slice) for v in (ind.start, ind.stop, ind.step)): + raise TypeError( + "You have put a slice in a slice. Did you forget curly braces [{...}]?" + ) + + # This ensures that callable start/stop are handled + start, stop = self.axes[i]._process_loc(ind.start, ind.stop) + + groups = [] + if ind != slice(None): + merge = 1 + if ind.step is not None: + if getattr(ind.step, "factor", None) is not None: + merge = ind.step.factor + elif ( + hasattr(ind.step, "group_mapping") + and (tmp_groups := ind.step.group_mapping(self.axes[i])) + is not None + ): + groups = tmp_groups + elif callable(ind.step): + if ind.step is sum: + integrations.add(i) + else: + raise NotImplementedError + + if ind.start is not None or ind.stop is not None: + slices.append( + _core.algorithm.slice( + i, start, stop, _core.algorithm.slice_mode.crop + ) + ) + if len(groups) == 0: + continue + else: + raise IndexError( + "The third argument to a slice must be rebin or projection" + ) + + assert isinstance(start, int) + assert isinstance(stop, int) + # rebinning with factor + if len(groups) == 0: + slices.append( + _core.algorithm.slice_and_rebin(i, start, stop, merge) + ) + # rebinning with groups + elif len(groups) != 0: + if not reduced: + reduced = self._hist + axes = [reduced.axis(x) for x in range(reduced.rank())] + reduced_view = reduced.view(flow=True) + new_axes_indices = [axes[i].edges[0]] + + j = 0 + for group in groups: + new_axes_indices += [axes[i].edges[j + group]] + j += group + + variable_axis = Variable( + new_axes_indices, metadata=axes[i].metadata + ) + axes[i] = variable_axis._ax + + logger.debug("Axes: %s", axes) + + new_reduced = reduced.__class__(axes) + new_view = new_reduced.view(flow=True) + + j = 1 + for new_j, group in enumerate(groups): + for _ in range(group): + pos = [slice(None)] * (i) + new_view[(*pos, new_j + 1, ...)] += _to_view( + reduced_view[(*pos, j, ...)] + ) + j += 1 + + reduced = new_reduced + + # Will be updated below + if (slices or pick_set or pick_each or integrations) and not reduced: + reduced = self._hist + elif not reduced: + reduced = copy.copy(self._hist) + + if pick_each: + tuple_slice = tuple( + pick_each.get(i, slice(None)) for i in range(reduced.rank()) + ) + logger.debug("Slices for pick each: %s", tuple_slice) + axes = [ + reduced.axis(i) for i in range(reduced.rank()) if i not in pick_each + ] + logger.debug("Axes: %s", axes) + new_reduced = reduced.__class__(axes) + new_reduced.view(flow=True)[...] = reduced.view(flow=True)[tuple_slice] + reduced = new_reduced + integrations = {i - sum(j <= i for j in pick_each) for i in integrations} + pick_set = { + i - sum(j <= i for j in pick_each): v for i, v in pick_set.items() + } + for slice_ in slices: + slice_.iaxis -= sum(j <= slice_.iaxis for j in pick_each) + + if slices: + logger.debug("Reduce with %s", slices) + reduced = reduced.reduce(*slices) + + if pick_set: + warnings.warn( + "List indexing selection is experimental. Removed bins are not placed in overflow.", + stacklevel=2, + ) + logger.debug("Slices for picking sets: %s", pick_set) + axes = [reduced.axis(i) for i in range(reduced.rank())] + reduced_view = reduced.view(flow=True) + for i in pick_set: # pylint: disable=consider-using-dict-items + selection = copy.copy(pick_set[i]) + ax = reduced.axis(i) + if ax.traits_ordered: + msg = f"Axis {i} is not a categorical axis, cannot pick with list: {ax}" + raise RuntimeError(msg) + + if ax.traits_overflow and ax.size not in pick_set[i]: + selection.append(ax.size) + + new_axis = axes[i].__class__([axes[i].value(j) for j in pick_set[i]]) # type: ignore[call-arg] + new_axis.metadata = axes[i].metadata + axes[i] = new_axis + reduced_view = np.take(reduced_view, selection, axis=i) + + logger.debug("Axes: %s", axes) + new_reduced = reduced.__class__(axes) + new_reduced.view(flow=True)[...] = reduced_view + reduced = new_reduced + + if integrations: + projections = [i for i in range(reduced.rank()) if i not in integrations] + reduced = reduced.project(*projections) + + return self._new_hist(reduced) if reduced.rank() > 0 else reduced.sum(flow=True) + + def __setitem__(self, index: IndexingExpr, value: ArrayLike | Accumulator) -> None: + """ + There are several supported possibilities: + + h[slice] = array # same size + + If an array is given to a compatible slice, it is set. + + h[a:] = array # One larger + + If an array is given that does not match, if it does match the + with-overflow size, it fills that. + + PLANNED (not yet supported): + + h[a:] = h2 + + If another histogram is given, that must either match with or without + overflow, where the overflow bins must be overflow bins (that is, + you cannot set a histogram's flow bins from another histogram that + is 2 larger). Bin edges must be a close match, as well. If you don't + want this level of type safety, just use ``h[...] = h2.view()``. + """ + indexes = self._compute_commonindex(index) + + if isinstance(value, Histogram): + raise TypeError("Not supported yet") + + value = np.asarray(value) + view = self.view(flow=True) + + value_shape: tuple[int, ...] + # Support raw arrays for accumulators, the final dimension is the constructor values + if ( + value.ndim > 0 + and len(view.dtype) > 0 + and len(value.dtype) == 0 + and len(view.dtype) == value.shape[-1] + ): + value_shape = value.shape[:-1] + value_ndim = value.ndim - 1 + else: + value_shape = value.shape + value_ndim = value.ndim + + # NumPy does not broadcast partial slices, but we would need + # to allow it (because we do allow broadcasting up dimensions) + # Instead, we simply require matching dimensions. + if value_ndim > 0 and value_ndim != sum(isinstance(i, slice) for i in indexes): + msg = f"Setting a {len(indexes)}D histogram with a {value_ndim}D array must have a matching number of dimensions" + raise ValueError(msg) + + # Here, value_n does not increment with n if this is not a slice + value_n = 0 + for n, request in enumerate(indexes): + has_underflow = self.axes[n].traits.underflow + has_overflow = self.axes[n].traits.overflow + + if isinstance(request, slice): + # Only consider underflow/overflow if the endpoints are not given + use_underflow = has_underflow and request.start is None + use_overflow = has_overflow and request.stop is None + + # Make the limits explicit since we may need to shift them + start = 0 if request.start is None else request.start + stop = len(self.axes[n]) if request.stop is None else request.stop + request_len = stop - start + + # If set to a scalar, then treat it like broadcasting without flow bins + if value_ndim == 0: + start = 0 + has_overflow + stop = len(self.axes[n]) + has_underflow + + # Normal setting + elif request_len == value_shape[value_n]: + start += has_underflow + stop += has_underflow + + # Expanded setting + elif request_len + use_underflow + use_overflow == value_shape[value_n]: + start += has_underflow and not use_underflow + stop += has_underflow + (has_overflow and use_overflow) + + # Single element broadcasting + elif value_shape[value_n] == 1: + start += has_underflow + stop += has_underflow + + else: + msg = f"Mismatched shapes in dimension {n}" + msg += f", {value_shape[n]} != {request_len}" + if use_underflow or use_overflow: + msg += f" or {request_len + use_underflow + use_overflow}" + raise ValueError(msg) + indexes[n] = slice(start, stop, request.step) + value_n += 1 + else: + indexes[n] = request + has_underflow + + view[tuple(indexes)] = value # type: ignore[arg-type] + + def project(self: H, *args: int) -> H | float | Accumulator: + """ + Project to a single axis or several axes on a multidimensional histogram. + Provided a list of axis numbers, this will produce the histogram over + those axes only. Flow bins are used if available. + """ + for arg in args: + if arg < 0 or arg >= self.ndim: + raise ValueError( + f"Projection axis must be a valid axis number 0 to {self.ndim - 1}, not {arg}" + ) + + return self._new_hist(self._hist.project(*args)) + + # Implementation of PlottableHistogram + + @property + def kind(self) -> Kind: + """ + Returns Kind.COUNT if this is a normal summing histogram, and Kind.MEAN if this is a + mean histogram. + + :return: Kind + """ + mean = self._hist._storage_type in { + _core.storage.mean, + _core.storage.weighted_mean, + } + + return Kind.MEAN if mean else Kind.COUNT + + def values(self, flow: bool = False) -> np.typing.NDArray[Any]: + """ + Returns the accumulated values. The counts for simple histograms, the + sum of weights for weighted histograms, the mean for profiles, etc. + + If counts is equal to 0, the value in that cell is undefined if + kind == "MEAN". + + :param flow: Enable flow bins. Not part of PlottableHistogram, but + included for consistency with other methods and flexibility. + + :return: "np.typing.NDArray[Any]"[np.float64] + """ + + view = self.view(flow) + # TODO: Might be a NumPy typing bug + if len(view.dtype) == 0: + return view + return view.value + + def variances(self, flow: bool = False) -> np.typing.NDArray[Any] | None: + """ + Returns the estimated variance of the accumulated values. The sum of squared + weights for weighted histograms, the variance of samples for profiles, etc. + For an unweighed histogram where kind == "COUNT", this should return the same + as values if the histogram was not filled with weights, and None otherwise. + If counts is equal to 1 or less, the variance in that cell is undefined if + kind == "MEAN". This must be written <= 1, and not < 2; when this + effective counts (weighed mean), then counts could be less than 2 but + more than 1. + + If kind == "MEAN", the counts can be used to compute the error on the mean + as sqrt(variances / counts), this works whether or not the entries are + weighted if the weight variance was tracked by the implementation. + + Currently, this always returns - but in the future, it will return None + if a weighted fill is made on a unweighed storage. + + :param flow: Enable flow bins. Not part of PlottableHistogram, but + included for consistency with other methods and flexibility. + + :return: "np.typing.NDArray[Any]"[np.float64] + """ + + view = self.view(flow) + if len(view.dtype) == 0: + return view if self._variance_known else None + + if hasattr(view, "sum_of_weights"): + valid = view.sum_of_weights**2 > view.sum_of_weights_squared # type: ignore[union-attr] + return np.divide( + view.variance, + view.sum_of_weights, + out=np.full(view.sum_of_weights.shape, np.nan), + where=valid, + ) + + if hasattr(view, "count"): + return np.divide( + view.variance, + view.count, + out=np.full(view.count.shape, np.nan), + where=view.count > 1, + ) + + return view.variance + + def counts(self, flow: bool = False) -> np.typing.NDArray[Any]: + """ + Returns the number of entries in each bin for an unweighted + histogram or profile and an effective number of entries (defined below) + for a weighted histogram or profile. An exotic generalized histogram could + have no sensible .counts, so this is Optional and should be checked by + Consumers. + + If kind == "MEAN", counts (effective or not) can and should be used to + determine whether the mean value and its variance should be displayed + (see documentation of values and variances, respectively). The counts + should also be used to compute the error on the mean (see documentation + of variances). + + For a weighted histogram, counts is defined as sum_of_weights ** 2 / + sum_of_weights_squared. It is equal or less than the number of times + the bin was filled, the equality holds when all filled weights are equal. + The larger the spread in weights, the smaller it is, but it is always 0 + if filled 0 times, and 1 if filled once, and more than 1 otherwise. + + :return: "np.typing.NDArray[Any]"[np.float64] + """ + + view = self.view(flow) + + if len(view.dtype) == 0: + return view + + if hasattr(view, "sum_of_weights"): + return np.divide( + view.sum_of_weights**2, + view.sum_of_weights_squared, # type: ignore[union-attr] + out=np.zeros_like(view.sum_of_weights, dtype=np.float64), + where=view.sum_of_weights_squared != 0, # type: ignore[union-attr] + ) + + if hasattr(view, "count"): + return view.count + + return view.value + + +if TYPE_CHECKING: + from uhi.typing.plottable import PlottableHistogram + + _: PlottableHistogram = typing.cast(Histogram, None) From f04c09bbf151d06fc6753c65a460ec9a2286edb3 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Thu, 30 Jan 2025 10:34:46 -0500 Subject: [PATCH 12/16] chore: more Ruff checks Signed-off-by: Henry Schreiner --- docs/api/boost_histogram.axis.rst | 1 - docs/api/boost_histogram.rst | 1 - pyproject.toml | 10 ++++- scripts/performance_report.py | 0 src/boost_histogram/_core/__init__.pyi | 2 - src/boost_histogram/_core/accumulators.pyi | 38 ++++++++---------- src/boost_histogram/_core/algorithm.pyi | 3 -- src/boost_histogram/_core/axis/__init__.pyi | 16 +++----- src/boost_histogram/_core/axis/transform.pyi | 11 ++--- src/boost_histogram/_core/hist.pyi | 42 +++++++++----------- src/boost_histogram/_core/storage.pyi | 15 +++---- src/boost_histogram/axis/__init__.py | 6 +-- src/boost_histogram/histogram.py | 4 +- src/boost_histogram/storage.py | 12 +++--- src/boost_histogram/version.pyi | 2 - src/boost_histogram/view.py | 2 +- 16 files changed, 73 insertions(+), 92 deletions(-) mode change 100644 => 100755 scripts/performance_report.py diff --git a/docs/api/boost_histogram.axis.rst b/docs/api/boost_histogram.axis.rst index 39978d2b..c88aa179 100644 --- a/docs/api/boost_histogram.axis.rst +++ b/docs/api/boost_histogram.axis.rst @@ -14,4 +14,3 @@ boost\_histogram.axis.transform :members: :undoc-members: :show-inheritance: - diff --git a/docs/api/boost_histogram.rst b/docs/api/boost_histogram.rst index 79607999..3423a5b6 100644 --- a/docs/api/boost_histogram.rst +++ b/docs/api/boost_histogram.rst @@ -65,4 +65,3 @@ boost\_histogram.view :members: :undoc-members: :show-inheritance: - diff --git a/pyproject.toml b/pyproject.toml index ee6f9990..d749a9c9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -214,17 +214,22 @@ messages_control.disable = [ [tool.ruff.lint] extend-select = [ - "B", # flake8-bugbear - "I", # isort "ARG", # flake8-unused-arguments + "B", # flake8-bugbear "C4", # flake8-comprehensions + "EXE", # flake8-executable + "FURB", # refurb + "G", # flake8-logging-format + "I", # isort "ICN", # flake8-import-conventions "ISC", # flake8-implicit-str-concat + "PD", # pandas-vet "PGH", # pygrep-hooks "PIE", # flake8-pie "PL", # pylint "PT", # flake8-pytest-style "PTH", # flake8-use-pathlib + "PYI", # flake8-pyi "RET", # flake8-return "RUF", # Ruff-specific "SIM", # flake8-simplify @@ -238,6 +243,7 @@ ignore = [ "E501", # Line too long "PT011", "PT013", # Incorrect pytest codes "ISC001", # Conflicts with the formatter + "PYI034", # We are returning Self, just generic ] typing-modules = ["boost_histogram.typing"] isort.required-imports = ["from __future__ import annotations"] diff --git a/scripts/performance_report.py b/scripts/performance_report.py old mode 100644 new mode 100755 diff --git a/src/boost_histogram/_core/__init__.pyi b/src/boost_histogram/_core/__init__.pyi index 359f5732..106e6987 100644 --- a/src/boost_histogram/_core/__init__.pyi +++ b/src/boost_histogram/_core/__init__.pyi @@ -1,3 +1 @@ -from __future__ import annotations - from . import accumulators, algorithm, axis, hist, storage diff --git a/src/boost_histogram/_core/accumulators.pyi b/src/boost_histogram/_core/accumulators.pyi index 1ff58dae..cc34a47e 100644 --- a/src/boost_histogram/_core/accumulators.pyi +++ b/src/boost_histogram/_core/accumulators.pyi @@ -1,18 +1,14 @@ -from __future__ import annotations - -from typing import Any, Tuple, TypeVar, overload +from typing import Any, overload from numpy.typing import ArrayLike - -T = TypeVar("T", bound="_BaseAccumulator") +from typing_extensions import Self class _BaseAccumulator: - def __eq__(self, other: Any) -> bool: ... - def __ne__(self, other: Any) -> bool: ... - def __imul__(self: T, other: float) -> T: ... - def __repr__(self) -> str: ... - def __copy__(self: T) -> T: ... - def __deepcopy__(self: T, memo: Any) -> T: ... + def __eq__(self, other: object) -> bool: ... + def __ne__(self, other: object) -> bool: ... + def __imul__(self, other: float) -> Self: ... + def __copy__(self) -> Self: ... + def __deepcopy__(self, memo: Any) -> Self: ... def _ipython_key_completions_(self) -> tuple[str, ...]: ... class WeightedSum(_BaseAccumulator): @@ -26,8 +22,8 @@ class WeightedSum(_BaseAccumulator): def value(self) -> float: ... @property def variance(self) -> float: ... - def __iadd__(self: T, arg0: float) -> T: ... - def fill(self: T, value: ArrayLike, variance: ArrayLike | None = None) -> T: ... + def __iadd__(self, arg0: float) -> Self: ... + def fill(self, value: ArrayLike, variance: ArrayLike | None = None) -> Self: ... @staticmethod def _make(a: ArrayLike, b: ArrayLike) -> WeightedSum: ... @staticmethod @@ -42,8 +38,8 @@ class Sum(_BaseAccumulator): def __init__(self, value: float) -> None: ... @property def value(self) -> float: ... - def __iadd__(self: T, arg0: float) -> T: ... - def fill(self: T, value: ArrayLike) -> T: ... + def __iadd__(self, arg0: float) -> Self: ... + def fill(self, value: ArrayLike) -> Self: ... @property def _small(self) -> float: ... @property @@ -71,9 +67,9 @@ class WeightedMean(_BaseAccumulator): @property def variance(self) -> float: ... def __call__( - self: T, value: ArrayLike, *, weight: ArrayLike | None = None - ) -> T: ... - def fill(self: T, value: ArrayLike, *, weight: ArrayLike | None = None) -> T: ... + self, value: ArrayLike, *, weight: ArrayLike | None = None + ) -> Self: ... + def fill(self, value: ArrayLike, *, weight: ArrayLike | None = None) -> Self: ... @staticmethod def _make( arg0: ArrayLike, arg1: ArrayLike, arg2: ArrayLike, arg3: ArrayLike @@ -99,9 +95,9 @@ class Mean(_BaseAccumulator): @property def variance(self) -> float: ... def __call__( - self: T, value: ArrayLike, *, weight: ArrayLike | None = None - ) -> T: ... - def fill(self: T, value: ArrayLike, *, weight: ArrayLike | None = None) -> T: ... + self, value: ArrayLike, *, weight: ArrayLike | None = None + ) -> Self: ... + def fill(self, value: ArrayLike, *, weight: ArrayLike | None = None) -> Self: ... @staticmethod def _make(arg0: ArrayLike, arg1: ArrayLike, arg2: ArrayLike) -> Mean: ... @staticmethod diff --git a/src/boost_histogram/_core/algorithm.pyi b/src/boost_histogram/_core/algorithm.pyi index bd1ef66c..0a918679 100644 --- a/src/boost_histogram/_core/algorithm.pyi +++ b/src/boost_histogram/_core/algorithm.pyi @@ -1,11 +1,8 @@ -from __future__ import annotations - import enum import typing class reduce_command: iaxis: int - def __repr__(self) -> str: ... class slice_mode(enum.Enum): shrink = enum.auto() diff --git a/src/boost_histogram/_core/axis/__init__.pyi b/src/boost_histogram/_core/axis/__init__.pyi index 77e8c443..4c99aeb6 100644 --- a/src/boost_histogram/_core/axis/__init__.pyi +++ b/src/boost_histogram/_core/axis/__init__.pyi @@ -1,21 +1,17 @@ -from __future__ import annotations - from typing import Any, Iterable, Iterator, Tuple, TypeVar import numpy as np from numpy.typing import ArrayLike +from typing_extensions import Self from . import transform -T = TypeVar("T", bound="_BaseAxis") - class _BaseAxis: - def __eq__(self, other: Any) -> bool: ... - def __ne__(self, other: Any) -> bool: ... - def __imul__(self: T, other: float) -> T: ... - def __repr__(self) -> str: ... - def __copy__(self: T) -> T: ... - def __deepcopy__(self: T, memo: Any) -> T: ... + def __eq__(self, other: object) -> bool: ... + def __ne__(self, other: object) -> bool: ... + def __imul__(self, other: float) -> Self: ... + def __copy__(self) -> Self: ... + def __deepcopy__(self, memo: Any) -> Self: ... def _ipython_key_completions_(self) -> tuple[str, ...]: ... @property def traits_underflow(self) -> bool: ... diff --git a/src/boost_histogram/_core/axis/transform.pyi b/src/boost_histogram/_core/axis/transform.pyi index 83a668d6..9cfb372d 100644 --- a/src/boost_histogram/_core/axis/transform.pyi +++ b/src/boost_histogram/_core/axis/transform.pyi @@ -1,8 +1,6 @@ -from __future__ import annotations +from typing import Any, Callable -from typing import Any, Callable, TypeVar - -T = TypeVar("T", bound="_BaseTransform") +from typing_extensions import Self def _log_fn(arg0: float) -> float: ... def _exp_fn(arg0: float) -> float: ... @@ -12,9 +10,8 @@ def _sq_fn(arg0: float) -> float: ... class _BaseTransform: def forward(self, arg0: float) -> float: ... def inverse(self, arg0: float) -> float: ... - def __repr__(self) -> str: ... - def __copy__(self: T) -> T: ... - def __deepcopy__(self: T, memo: Any) -> T: ... + def __copy__(self) -> Self: ... + def __deepcopy__(self, memo: Any) -> Self: ... class id(_BaseTransform): ... class sqrt(_BaseTransform): ... diff --git a/src/boost_histogram/_core/hist.pyi b/src/boost_histogram/_core/hist.pyi index 0b0b45e3..7333e964 100644 --- a/src/boost_histogram/_core/hist.pyi +++ b/src/boost_histogram/_core/hist.pyi @@ -1,15 +1,12 @@ -from __future__ import annotations - -from typing import Any, ClassVar, Iterable, Iterator, Tuple, Type, TypeVar +from typing import Any, ClassVar import numpy as np from numpy.typing import ArrayLike +from typing_extensions import Self from . import accumulators, axis, storage from .axis import transform -T = TypeVar("T", bound="_BaseHistogram") - _axes_limit: int class _BaseHistogram: @@ -21,12 +18,11 @@ class _BaseHistogram: def rank(self) -> int: ... def size(self) -> int: ... def reset(self) -> None: ... - def __eq__(self, other: Any) -> bool: ... - def __ne__(self, other: Any) -> bool: ... - def __repr__(self) -> str: ... - def __copy__(self: T) -> T: ... - def __deepcopy__(self: T, memo: Any) -> T: ... - def __iadd__(self: T, other: _BaseHistogram) -> T: ... + def __eq__(self, other: object) -> bool: ... + def __ne__(self, other: object) -> bool: ... + def __copy__(self) -> Self: ... + def __deepcopy__(self, memo: Any) -> Self: ... + def __iadd__(self, other: _BaseHistogram) -> Self: ... def to_numpy(self, flow: bool = ...) -> tuple[np.typing.NDArray[Any], ...]: ... def view(self, flow: bool = ...) -> np.typing.NDArray[Any]: ... def axis(self, i: int = ...) -> axis._BaseAxis: ... @@ -37,42 +33,42 @@ class _BaseHistogram: sample: ArrayLike | None = ..., ) -> None: ... def empty(self, flow: bool = ...) -> bool: ... - def reduce(self: T, *args: Any) -> T: ... - def project(self: T, *args: int) -> T: ... + def reduce(self, *args: Any) -> Self: ... + def project(self, *args: int) -> Self: ... def sum(self, flow: bool = ...) -> Any: ... def at(self, *args: int) -> Any: ... class any_int64(_BaseHistogram): - def __idiv__(self: T, other: any_int64) -> T: ... - def __imul__(self: T, other: any_int64) -> T: ... + def __idiv__(self, other: any_int64) -> Self: ... + def __imul__(self, other: any_int64) -> Self: ... def at(self, *args: int) -> int: ... def _at_set(self, value: int, *args: int) -> None: ... def sum(self, flow: bool = ...) -> int: ... class any_unlimited(_BaseHistogram): - def __idiv__(self: T, other: any_unlimited) -> T: ... - def __imul__(self: T, other: any_unlimited) -> T: ... + def __idiv__(self, other: any_unlimited) -> Self: ... + def __imul__(self, other: any_unlimited) -> Self: ... def at(self, *args: int) -> float: ... def _at_set(self, value: float, *args: int) -> None: ... def sum(self, flow: bool = ...) -> float: ... class any_double(_BaseHistogram): - def __idiv__(self: T, other: any_double) -> T: ... - def __imul__(self: T, other: any_double) -> T: ... + def __idiv__(self, other: any_double) -> Self: ... + def __imul__(self, other: any_double) -> Self: ... def at(self, *args: int) -> float: ... def _at_set(self, value: float, *args: int) -> None: ... def sum(self, flow: bool = ...) -> float: ... class any_atomic_int64(_BaseHistogram): - def __idiv__(self: T, other: any_atomic_int64) -> T: ... - def __imul__(self: T, other: any_atomic_int64) -> T: ... + def __idiv__(self, other: any_atomic_int64) -> Self: ... + def __imul__(self, other: any_atomic_int64) -> Self: ... def at(self, *args: int) -> int: ... def _at_set(self, value: int, *args: int) -> None: ... def sum(self, flow: bool = ...) -> int: ... class any_weight(_BaseHistogram): - def __idiv__(self: T, other: any_weight) -> T: ... - def __imul__(self: T, other: any_weight) -> T: ... + def __idiv__(self, other: any_weight) -> Self: ... + def __imul__(self, other: any_weight) -> Self: ... def at(self, *args: int) -> accumulators.WeightedSum: ... def _at_set(self, value: accumulators.WeightedSum, *args: int) -> None: ... def sum(self, flow: bool = ...) -> accumulators.WeightedSum: ... diff --git a/src/boost_histogram/_core/storage.pyi b/src/boost_histogram/_core/storage.pyi index b8dca76c..7153e45f 100644 --- a/src/boost_histogram/_core/storage.pyi +++ b/src/boost_histogram/_core/storage.pyi @@ -1,16 +1,13 @@ -from __future__ import annotations +from typing import Any -from typing import Any, TypeVar - -T = TypeVar("T", bound="_BaseStorage") +from typing_extensions import Self class _BaseStorage: def __init__(self) -> None: ... - def __eq__(self, other: Any) -> bool: ... - def __ne__(self, other: Any) -> bool: ... - def __repr__(self) -> str: ... - def __copy__(self: T) -> T: ... - def __deepcopy__(self: T, memo: Any) -> T: ... + def __eq__(self, other: object) -> bool: ... + def __ne__(self, other: object) -> bool: ... + def __copy__(self) -> Self: ... + def __deepcopy__(self, memo: Any) -> Self: ... class int64(_BaseStorage): ... class double(_BaseStorage): ... diff --git a/src/boost_histogram/axis/__init__.py b/src/boost_histogram/axis/__init__.py index dffe8704..49aa228e 100644 --- a/src/boost_histogram/axis/__init__.py +++ b/src/boost_histogram/axis/__init__.py @@ -166,10 +166,10 @@ def bin(self, index: float) -> int | str | tuple[float, float]: return self._ax.bin(index) # type: ignore[no-any-return] - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: return hasattr(other, "_ax") and self._ax == other._ax - def __ne__(self, other: Any) -> bool: + def __ne__(self, other: object) -> bool: return (not hasattr(other, "_ax")) or self._ax != other._ax @classmethod @@ -838,7 +838,7 @@ class AxesTuple(tuple): # type: ignore[type-arg] __slots__ = () _MGRIDOPTS: ClassVar[MGridOpts] = {"sparse": True, "indexing": "ij"} - def __init__(self, __iterable: Iterable[Axis]) -> None: + def __init__(self, /, _iterable: Iterable[Axis]) -> None: for item in self: if not isinstance(item, Axis): raise TypeError( diff --git a/src/boost_histogram/histogram.py b/src/boost_histogram/histogram.py index 571e15dd..c62fea68 100644 --- a/src/boost_histogram/histogram.py +++ b/src/boost_histogram/histogram.py @@ -415,10 +415,10 @@ def __array__( kwargs["copy"] = copy return np.asarray(self.view(False), dtype=dtype, **kwargs) # type: ignore[call-overload] - def __eq__(self, other: Any) -> bool: + def __eq__(self, other: object) -> bool: return hasattr(other, "_hist") and self._hist == other._hist - def __ne__(self, other: Any) -> bool: + def __ne__(self, other: object) -> bool: return (not hasattr(other, "_hist")) or self._hist != other._hist def __add__(self: H, other: Histogram | np.typing.NDArray[Any] | float) -> H: diff --git a/src/boost_histogram/storage.py b/src/boost_histogram/storage.py index 4e1edffd..387bd7a1 100644 --- a/src/boost_histogram/storage.py +++ b/src/boost_histogram/storage.py @@ -36,11 +36,13 @@ def __repr__(self) -> str: accumulator: ClassVar[ ( - type[int] - | type[float] - | type[accumulators.WeightedMean] - | type[accumulators.WeightedSum] - | type[accumulators.Mean] + type[ + int + | float + | accumulators.WeightedMean + | accumulators.WeightedSum + | accumulators.Mean + ] ) ] diff --git a/src/boost_histogram/version.pyi b/src/boost_histogram/version.pyi index 502a8eeb..c2ee2cab 100644 --- a/src/boost_histogram/version.pyi +++ b/src/boost_histogram/version.pyi @@ -1,3 +1 @@ -from __future__ import annotations - version: str diff --git a/src/boost_histogram/view.py b/src/boost_histogram/view.py index c4a8556b..c4ccde25 100644 --- a/src/boost_histogram/view.py +++ b/src/boost_histogram/view.py @@ -13,7 +13,7 @@ class View(np.ndarray): # type: ignore[type-arg] __slots__ = () _FIELDS: ClassVar[tuple[str, ...]] - _PARENT: type[WeightedSum] | type[WeightedMean] | type[Mean] + _PARENT: type[WeightedSum | WeightedMean | Mean] def __getitem__(self, ind: StrIndex) -> np.typing.NDArray[Any]: # type: ignore[override] sliced = super().__getitem__(ind) From 2b1105ef13a58f135ec7f11f114b6442275d848e Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Thu, 30 Jan 2025 10:37:23 -0500 Subject: [PATCH 13/16] chore: drop hack for docs Signed-off-by: Henry Schreiner --- docs/api/boost_histogram.rst | 4 ++-- noxfile.py | 15 --------------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/docs/api/boost_histogram.rst b/docs/api/boost_histogram.rst index 3423a5b6..4ee85400 100644 --- a/docs/api/boost_histogram.rst +++ b/docs/api/boost_histogram.rst @@ -1,8 +1,8 @@ boost\_histogram ================ -.. automodule:: boost_histogram._internal.hist - :members: Histogram +.. automodule:: boost_histogram + :members: :undoc-members: :show-inheritance: diff --git a/noxfile.py b/noxfile.py index 8d182091..92cf8b47 100644 --- a/noxfile.py +++ b/noxfile.py @@ -1,7 +1,6 @@ from __future__ import annotations import argparse -from pathlib import Path from typing import Any import nox @@ -98,20 +97,6 @@ def build_api_docs(session: nox.Session) -> None: "src/boost_histogram", ) - # add API docs of boost_histogram._internal.hist.Histogram after - # the generation step - with Path("docs/api/boost_histogram.rst").open("r+") as f: - lines = f.readlines() - for i in range(len(lines)): - if lines[i] == ".. automodule:: boost_histogram\n": - lines[i] = ".. automodule:: boost_histogram._internal.hist\n" - lines[i + 1] = " :members: Histogram\n" - break - - f.truncate(0) - f.seek(0) - f.writelines(lines) - @nox.session def lint(session: nox.Session) -> None: From 1eaf83339cb69fce87dc54e9e83b86cbaa18b715 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Thu, 30 Jan 2025 10:44:08 -0500 Subject: [PATCH 14/16] fix: import location Signed-off-by: Henry Schreiner --- src/boost_histogram/numpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/boost_histogram/numpy.py b/src/boost_histogram/numpy.py index e4b269b8..e8327d75 100644 --- a/src/boost_histogram/numpy.py +++ b/src/boost_histogram/numpy.py @@ -13,7 +13,7 @@ import numpy as np -from . import Histogram +from .histogram import Histogram from . import axis as _axis from . import storage as _storage From 5244f75dec14e891606236101f5238b435b5be07 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 30 Jan 2025 15:45:21 +0000 Subject: [PATCH 15/16] style: pre-commit fixes --- src/boost_histogram/numpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/boost_histogram/numpy.py b/src/boost_histogram/numpy.py index e8327d75..8e461718 100644 --- a/src/boost_histogram/numpy.py +++ b/src/boost_histogram/numpy.py @@ -13,9 +13,9 @@ import numpy as np -from .histogram import Histogram from . import axis as _axis from . import storage as _storage +from .histogram import Histogram __all__ = ("histogram", "histogram2d", "histogramdd") From 80038a22892036aebb866b0bef602c8bc58dc649 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Thu, 30 Jan 2025 11:31:18 -0500 Subject: [PATCH 16/16] fix: use notes for Python 3.11+ Signed-off-by: Henry Schreiner --- src/boost_histogram/histogram.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/boost_histogram/histogram.py b/src/boost_histogram/histogram.py index c62fea68..d803f09d 100644 --- a/src/boost_histogram/histogram.py +++ b/src/boost_histogram/histogram.py @@ -3,6 +3,7 @@ import collections.abc import copy import logging +import sys import threading import typing import warnings @@ -40,13 +41,16 @@ try: from . import _core except ImportError as err: - msg = str(err) - if "_core" not in msg: + if "_core" not in str(err): raise new_msg = "Did you forget to compile boost-histogram? Use CMake or Setuptools to build, see the readme." - total_msg = f"{msg}\n{new_msg}" + if sys.version_info >= (3, 11): + err.add_note(new_msg) + raise + + total_msg = f"{err}\n{new_msg}" new_exception = type(err)(new_msg, name=err.name, path=err.path) raise new_exception from err