Skip to content

Commit

Permalink
Merge branch 'main' of github.com:NCAS-CMS/cf-python
Browse files Browse the repository at this point in the history
  • Loading branch information
davidhassell committed Feb 27, 2024
2 parents 13abf44 + e5c13f3 commit 136ba10
Show file tree
Hide file tree
Showing 9 changed files with 367 additions and 1 deletion.
2 changes: 2 additions & 0 deletions Changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ version 3.17.0

**2024-??-??**

* New methods: `cf.Field.pad_missing` and `cf.Data.pad_missing`
(https://github.com/NCAS-CMS/cf-python/issues/717)
* Fix occasional bug when calculating UGRID cell areas when
non-spatial coordinates span the discrete axis
(https://github.com/NCAS-CMS/cf-python/issues/721)
Expand Down
17 changes: 16 additions & 1 deletion cf/aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -3920,7 +3920,22 @@ def _sort_indices(m, canonical_axes):
"""
canonical_axes = [m.id_to_axis[identity] for identity in canonical_axes]
sort_indices = tuple([m.sort_indices[axis] for axis in canonical_axes])
needs_sorting = sort_indices != (slice(None),) * len(sort_indices)

# Whether or not one or more of the axes needs sorting
needs_sorting = False
for sort_index in sort_indices:
# Note: sort_index can only be a slice object or a numpy array
# (see `_create_hash_and_first_values`)
if isinstance(sort_index, slice):
if sort_index != slice(None):
# sort_index is a slice other than slice(None)
needs_sorting = True
break
elif sort_index.size > 1:
# sort_index is an array of 2 or more integers
needs_sorting = True
break

return sort_indices, needs_sorting


Expand Down
103 changes: 103 additions & 0 deletions cf/data/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -2180,6 +2180,109 @@ def mean_of_upper_decile(

return d

@_inplace_enabled(default=False)
def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False):
"""Pad an axis with missing data.
:Parameters:
axis: `int`
Select the axis for which the padding is to be
applied.
*Parameter example:*
Pad second axis: ``axis=1``.
*Parameter example:*
Pad the last axis: ``axis=-1``.
{{pad_width: sequence of `int`, optional}}
{{to_size: `int`, optional}}
{{inplace: `bool`, optional}}
:Returns:
`Data` or `None`
The padded data, or `None` if the operation was
in-place.
**Examples**
>>> d = cf.Data(np.arange(6).reshape(2, 3))
>>> print(d.array)
[[0 1 2]
[3 4 5]]
>>> e = d.pad_missing(1, (1, 2))
>>> print(e.array)
[[-- 0 1 2 -- --]
[-- 3 4 5 -- --]]
>>> f = e.pad_missing(0, (0, 1))
>>> print(f.array)
[[-- 0 1 2 -- --]
[-- 3 4 5 -- --]
[-- -- -- -- -- --]]
>>> g = d.pad_missing(1, to_size=5)
>>> print(g.array)
[[0 1 2 -- --]
[3 4 5 -- --]]
"""
if not 0 <= axis < self.ndim:
raise ValueError(
f"'axis' must be a valid dimension position. Got {axis}"
)

if to_size is not None:
# Set pad_width from to_size
if pad_width is not None:
raise ValueError("Can't set both 'pad_width' and 'to_size'")

pad_width = (0, to_size - self.shape[axis])
elif pad_width is None:
raise ValueError("Must set either 'pad_width' or 'to_size'")

pad_width = np.asarray(pad_width)
if pad_width.shape != (2,) or not pad_width.dtype.kind == "i":
raise ValueError(
"'pad_width' must be a sequence of two integers. "
f"Got: {pad_width}"
)

pad_width = tuple(pad_width)
if any(n < 0 for n in pad_width):
if to_size is not None:
raise ValueError(
f"'to_size' ({to_size}) must not be smaller than the "
f"original axis size ({self.shape[axis]})"
)

raise ValueError(
f"Can't set a negative number of pad values. Got: {pad_width}"
)

d = _inplace_enabled_define_and_cleanup(self)

dx = d.to_dask_array()
mask0 = da.ma.getmaskarray(dx)

pad = [(0, 0)] * dx.ndim
pad[axis] = pad_width

# Pad the data with zero. This will lose the original mask.
dx = da.pad(dx, pad, mode="constant", constant_values=0)

# Pad the mask with True
mask = da.pad(mask0, pad, mode="constant", constant_values=True)

# Set the mask
dx = da.ma.masked_where(mask, dx)

d._set_dask(dx)
return d

@_inplace_enabled(default=False)
def percentile(
self,
Expand Down
8 changes: 8 additions & 0 deletions cf/docstring/docstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,14 @@
"{{weights auto: `bool`, optional}}": """auto: `bool`, optional
If True then return `False` if weights can't be found,
rather than raising an exception.""",
# pad_width
"{{pad_width: sequence of `int`, optional}}": """pad_width: sequence of `int`, optional
Number of values to pad before and after the edges of
the axis.""",
# to_size
"{{to_size: `int`, optional}}": """to_size: `int`, optional
Pad the axis after so that the new axis has the given
size.""",
# ----------------------------------------------------------------
# Method description substitutions (4 levels of indentation)
# ----------------------------------------------------------------
Expand Down
130 changes: 130 additions & 0 deletions cf/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -11953,6 +11953,136 @@ def halo(

return f

@_inplace_enabled(default=False)
def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False):
"""Pad an axis with missing data.

The field's data and all metadata constructs that span the
axis are padded.

.. versionadded:: 3.17.0

:Parameters:

axis: `str` or `int`
Select the domain axis which is to be padded, defined
by that which would be selected by passing the given
axis description to a call of the field construct's
`domain_axis` method. For example, for a value of
``'X'``, the domain axis construct returned by
``f.domain_axis('X')`` is selected.

{{pad_width: sequence of `int`, optional}}

{{to_size: `int`, optional}}

{{inplace: `bool`, optional}}

:Returns:

`Field` or `None`
The padded field construct, or `None` if the operation
was in-place.

**Examples*

>>> f = cf.example_field(6)
>>> print(f)
Field: precipitation_amount (ncvar%pr)
--------------------------------------
Data : precipitation_amount(cf_role=timeseries_id(2), time(4))
Dimension coords: time(4) = [2000-01-16 12:00:00, ..., 2000-04-15 00:00:00] gregorian
Auxiliary coords: latitude(cf_role=timeseries_id(2)) = [25.0, 7.0] degrees_north
: longitude(cf_role=timeseries_id(2)) = [10.0, 40.0] degrees_east
: cf_role=timeseries_id(cf_role=timeseries_id(2)) = [x1, y2]
: altitude(cf_role=timeseries_id(2), 3, 4) = [[[1.0, ..., --]]] m
Coord references: grid_mapping_name:latitude_longitude
>>> print(f.array)
[[1. 2. 3. 4.]
[5. 6. 7. 8.]]
>>> g = f.pad_missing('T', (0, 5))
>>> print(g)
Field: precipitation_amount (ncvar%pr)
--------------------------------------
Data : precipitation_amount(cf_role=timeseries_id(2), time(9))
Dimension coords: time(9) = [2000-01-16 12:00:00, ..., --] gregorian
Auxiliary coords: latitude(cf_role=timeseries_id(2)) = [25.0, 7.0] degrees_north
: longitude(cf_role=timeseries_id(2)) = [10.0, 40.0] degrees_east
: cf_role=timeseries_id(cf_role=timeseries_id(2)) = [x1, y2]
: altitude(cf_role=timeseries_id(2), 3, 4) = [[[1.0, ..., --]]] m
Coord references: grid_mapping_name:latitude_longitude
>>> print(g.array)
[[1.0 2.0 3.0 4.0 -- -- -- -- --]
[5.0 6.0 7.0 8.0 -- -- -- -- --]]
>>> h = g.pad_missing('cf_role=timeseries_id', (0, 1))
>>> print(h)
Field: precipitation_amount (ncvar%pr)
--------------------------------------
Data : precipitation_amount(cf_role=timeseries_id(3), time(9))
Dimension coords: time(9) = [2000-01-16 12:00:00, ..., --] gregorian
Auxiliary coords: latitude(cf_role=timeseries_id(3)) = [25.0, 7.0, --] degrees_north
: longitude(cf_role=timeseries_id(3)) = [10.0, 40.0, --] degrees_east
: cf_role=timeseries_id(cf_role=timeseries_id(3)) = [x1, y2, --]
: altitude(cf_role=timeseries_id(3), 3, 4) = [[[1.0, ..., --]]] m
Coord references: grid_mapping_name:latitude_longitude
>>> print(h.array)
[[1.0 2.0 3.0 4.0 -- -- -- -- --]
[5.0 6.0 7.0 8.0 -- -- -- -- --]
[ -- -- -- -- -- -- -- -- --]]

>>> print(f.pad_missing('time', to_size=6))
Field: precipitation_amount (ncvar%pr)
--------------------------------------
Data : precipitation_amount(cf_role=timeseries_id(2), time(6))
Dimension coords: time(6) = [2000-01-16 12:00:00, ..., --] gregorian
Auxiliary coords: latitude(cf_role=timeseries_id(2)) = [25.0, 7.0] degrees_north
: longitude(cf_role=timeseries_id(2)) = [10.0, 40.0] degrees_east
: cf_role=timeseries_id(cf_role=timeseries_id(2)) = [x1, y2]
: altitude(cf_role=timeseries_id(2), 3, 4) = [[[1.0, ..., --]]] m
Coord references: grid_mapping_name:latitude_longitude

"""
f = _inplace_enabled_define_and_cleanup(self)

try:
axis1 = f._parse_axes(axis)
except ValueError:
raise ValueError(
f"Can't pad_missing: Bad axis specification: {axis!r}"
)

if len(axis1) != 1:
raise ValueError(
f"Can't pad_missing: Bad axis specification: {axis!r}"
)

data_axes = f.get_data_axes()
axis = axis1[0]
iaxis = data_axes.index(axis)

# Pad the field
super(Field, f).pad_missing(
iaxis, pad_width=pad_width, to_size=to_size, inplace=True
)

# Set new domain axis size
domain_axis = f.domain_axis(axis)
domain_axis.set_size(f.shape[iaxis])

data_axes = f.constructs.data_axes()
for key, construct in f.constructs.filter_by_data(todict=True).items():
construct_axes = data_axes[key]
if axis not in construct_axes:
continue

# Pad the construct
iaxis = construct_axes.index(axis)
construct.pad_missing(
iaxis, pad_width=pad_width, to_size=to_size, inplace=True
)

return f

def percentile(
self,
ranks,
Expand Down
32 changes: 32 additions & 0 deletions cf/mixin/propertiesdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1888,6 +1888,38 @@ def minimum(self):
"ERROR: Can't get the minimum when there is no data array"
)

@_inplace_enabled(default=False)
def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False):
"""Pad an axis with missing data.
:Parameters:
axis: `int`
Select the axis for which the padding is to be
applied.
{{pad_width: sequence of `int`, optional}}
{{to_size: `int`, optional}}
{{inplace: `bool`, optional}}
:Returns:
`{{class}}` or `None`
The {{class}} with padded data, or `None` if the
operation was in-place.
"""
return self._apply_data_oper(
_inplace_enabled_define_and_cleanup(self),
"pad_missing",
axis=axis,
pad_width=pad_width,
to_size=to_size,
inplace=inplace,
)

def period(self, *value, **config):
"""Return or set the period of the data.
Expand Down
34 changes: 34 additions & 0 deletions cf/mixin/propertiesdatabounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -3874,6 +3874,40 @@ def inspect(self):
"""
print(cf_inspect(self)) # pragma: no cover

@_inplace_enabled(default=False)
def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False):
"""Pad an axis with missing data.
:Parameters:
axis: `int`
Select the axis for which the padding is to be
applied.
{{pad_width: sequence of `int`, optional}}
{{to_size: `int`, optional}}
{{inplace: `bool`, optional}}
:Returns:
`{{class}}` or `None`
The {{class}} with padded data, or `None` if the
operation was in-place.
"""
return self._apply_superclass_data_oper(
_inplace_enabled_define_and_cleanup(self),
"pad_missing",
bounds=True,
interior_ring=True,
axis=axis,
pad_width=pad_width,
to_size=to_size,
inplace=inplace,
)

def period(self, *value, **config):
"""Return or set the period for cyclic values.
Expand Down
Loading

0 comments on commit 136ba10

Please sign in to comment.