Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New BoundsChecker implementation #409

Merged
merged 6 commits into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/source/api/core_api.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,14 @@ language_info:
:members:
```

## The {mod}`~pyrealm.core.bounds` submodule

```{eval-rst}
.. automodule:: pyrealm.core.bounds
:autosummary:
:members:
```

## The {mod}`~pyrealm.core.calendar` submodule

```{eval-rst}
Expand Down
169 changes: 169 additions & 0 deletions pyrealm/core/bounds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
"""Some functions in ``pyrealm`` are only well-behaved with given bounds but those
bounds are often a little imprecise and real world data can contain extreme values. As a
result, the bounds checking is deliberately not that intrusive: it warns when a variable
contains out of value issues but leaves it up to the user to assess whether there is
real problem and to adjust input data if needed.

The ``bounds`` module:

* Defines a {class}`~pyrealm.core.bounds.Bounds` dataclass used to define bounds for a
particular variable.
* Defines a {class}`~pyrealm.core.bounds.BoundsChecker` class with default bounds for
core variables that acts as a library for bounds checking.
* The main use case is e.g. ``BoundsChecker().check("tc", np.array([10, 1000])``, which
will check that the alleged temperature data in °C fall within the configured bounds.

A ``BoundsChecker`` class instance is created with a predefined internal dictionary of
default variables and appropriate bounds. However, users can use the
{meth}`~pyrealm.core.bounds.BoundsChecker.update` method to overide defaults or add new
variables by providing a new ``Bounds`` instance.

The {meth}`~pyrealm.core.bounds.BoundsChecker.check` method can then be used to validate
a set of values against the configured bounds for a given variable name. The ``check``
method returns the input variables, to allow values to be checked while being assigned
to an attribute.
""" # noqa: D205

from dataclasses import dataclass
from typing import Any, ClassVar
from warnings import warn

import numpy as np
from numpy.typing import NDArray


@dataclass
class Bounds:
"""Bounds checking dataclass for variables."""

var_name: str
"""A variable name, typically the form used in function arguments."""
lower: float
"""A lower bound on sensible values."""
upper: float
"""An upper bound on sensible values."""
interval_type: str
"""The interval type of the constraint ('[]', '()', '[)', '(]')."""
unit: str
"""A string giving the expected units."""

def __post_init__(self) -> None:
"""Bounds data validation."""
if self.interval_type not in BoundsChecker._interval_types:
raise ValueError(f"Unknown interval type: {self.interval_type}")

if self.lower >= self.upper:
raise ValueError(f"Bounds equal or reversed: {self.lower}, {self.upper}")


class BoundsChecker:
"""A bounds checker for input variables.

The class provides a library of {class}`~pyrealm.core.bounds.Bounds` instances for
core variables, keyed by the
{attr}`Bounds.var_name<pyrealm.core.bounds.Bounds.var_name>` attribute. The table is
populated from default values when a ``BoundsChecker`` instance is created but can
be updated and extended by assigning new ``Bounds`` instances to existing or new
variable name keys using the ``update`` method.
"""

# TODO - think about these argument names - some unnecessarily terse.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What plan do you have for renaming these vars?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question - I think once the API structural changes have been pushed through, then review the function signatures for variable name and synchronise them across the code base. So a polishing step once we've put it all back together.

_defaults: tuple[tuple[str, float, float, str, str], ...] = (
("tc", -25, 80, "[]", "°C"),
("vpd", 0, 10000, "[]", "Pa"),
("co2", 0, 1000, "[]", "ppm"),
("patm", 30000, 110000, "[]", "Pa"),
("fapar", 0, 1, "[]", "-"),
("ppfd", 0, 3000, "[]", "-"),
("theta", 0, 0.8, "[]", "m3 m-3"),
("rootzonestress", 0, 1, "[]", "-"),
("aridity_index", 0, 50, "[]", "-"),
("mean_growth_temperature", 0, 50, "[]", "-"),
("rh", 0, 1, "[]", "-"),
("lat", -90, 90, "[]", "°"),
("sf", 0, 1, "[]", "-"),
("pn", 0, 1000, "[]", "mm day-1"),
("kWm", 0, 1e4, "[]", "mm"),
)
"""Default bounds data for core forcing variables."""

_interval_types: ClassVar[dict[str, tuple[np.ufunc, np.ufunc]]] = {
"()": (np.greater, np.less),
"[]": (np.greater_equal, np.less_equal),
"(]": (np.greater, np.less_equal),
"[)": (np.greater_equal, np.less),
}
"""Dictionary of numpy function pairs for testing interval types."""

def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)

self._data: dict[str, Bounds] = {}

for var in self._defaults:
var_bounds = Bounds(*var)
self._data[var_bounds.var_name] = var_bounds

def update(self, bounds: Bounds) -> None:
"""Update or add bounds data.

The {attr}`Bounds.var_name<pyrealm.core.bounds.Bounds.var_name>` attribute of
the provided ``Bounds`` instance is used to update an existing entry for the
name or add checking for a new name.

Args:
bounds: A Bounds instance.
"""

self._data[bounds.var_name] = bounds

def check(self, var_name: str, values: NDArray) -> NDArray:
r"""Check inputs fall within bounds.

This method checks whether the provided values fall within the bounds specified
for the given variable name and issues a warning when this is not the case. If
the ``BoundsChecker`` class has not been configured the variable name then a
warning will be given about lack of bounds checking. The method returns the
input values, so that the method can be used as a pass through validator for
assigning attributes.

Args:
var_name: The variable name
values: An np.ndarray

Returns:
The input values.

Examples:
>>> vals = np.array([-15, 20, 30, 124], dtype=float)
>>> bounds_checker = BoundsChecker()
>>> bounds_checker.check("temp", vals)
array([-15., 20., 30., 124.])
"""

var_bounds = self._data.get(var_name)

if var_bounds is None:
warn(
f"Variable '{var_name}' is not configured in the bounds checker. "
"No bounds checking performed."
)
return values

# Get the interval functions
lower_func, upper_func = self._interval_types[var_bounds.interval_type]

# Do the input values contain out of bound values?
out_of_bounds = np.logical_xor(
lower_func(values, var_bounds.lower),
upper_func(values, var_bounds.upper),
)

if np.any(out_of_bounds):
warn(
f"Variable '{var_name}' ({var_bounds.unit}) contains values outside "
f"the expected range ({var_bounds.lower},{var_bounds.upper}). "
"Check units?"
)

return values
7 changes: 5 additions & 2 deletions pyrealm/core/hygro.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from numpy.typing import NDArray

from pyrealm.constants import CoreConst
from pyrealm.core.utilities import bounds_checker, evaluate_horner_polynomial
from pyrealm.core.bounds import BoundsChecker
from pyrealm.core.utilities import evaluate_horner_polynomial


def calc_vp_sat(
Expand Down Expand Up @@ -93,6 +94,7 @@ def convert_rh_to_vpd(
rh: NDArray[np.float64],
ta: NDArray[np.float64],
core_const: CoreConst = CoreConst(),
bounds_checker: BoundsChecker = BoundsChecker(),
) -> NDArray[np.float64]:
"""Convert relative humidity to vapour pressure deficit.

Expand All @@ -101,6 +103,7 @@ def convert_rh_to_vpd(
ta: The air temperature in °C
core_const: An instance of :class:`~pyrealm.constants.core_const.CoreConst`
giving the settings to be used in conversions.
bounds_checker: A BoundsChecker instance used to validate inputs.

Returns:
The vapour pressure deficit in kPa
Expand All @@ -122,7 +125,7 @@ def convert_rh_to_vpd(
array([-171.1823864])
"""

rh = bounds_checker(rh, 0, 1, "[]", "rh", "proportion")
rh = bounds_checker.check("rh", rh)

vp_sat = calc_vp_sat(ta, core_const=core_const)

Expand Down
Loading