From e3f53d0d8d740f1a7ffeed821550d07513b31d1a Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 20 Feb 2024 08:41:53 +0000 Subject: [PATCH 1/6] dev --- cf/data/data.py | 51 +++++++++++++++++++++++ cf/field.py | 69 ++++++++++++++++++++++++++++++++ cf/mixin/fielddomain.py | 2 +- cf/mixin/propertiesdata.py | 19 +++++++++ cf/mixin/propertiesdatabounds.py | 22 ++++++++++ 5 files changed, 162 insertions(+), 1 deletion(-) diff --git a/cf/data/data.py b/cf/data/data.py index a881eefe22..d42e600dff 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -2180,6 +2180,56 @@ def mean_of_upper_decile( return d + @_inplace_enabled(default=False) + def pad_missing(self, axis, pad_width, inplace=False): + """TODO + + :Parameters: + + pad_width: `int` or sequence of array_like + Number of values padded before and after the edges of + the axis. ``(pad, pad)``, ``(pad,)`` and `int` yield + the same before and after pad for each edge. + + """ + d = _inplace_enabled_define_and_cleanup(self) + dx = d.to_dask_array() + mask0 = da.ma.getmaskarray(dx) + shape0 = dx.shape + + try: + pad_width0, pad_width1 = pad_width + except TypeError: + try: + pad_width0= pad_width1 = pad_width[0] + except TypeError: + pad_width0= pad_width1 = pad_width + + pad_width = [(0, 0)] * dx.ndim + pad_width[axis] = (pad_width0, pad_width1 ) + + dx = da.pad(dx, pad_width, mode='constant', constant_values=0) + mask = da.pad(mask0, pad_width, mode='constant', constant_values=0) + + index = [slice(None)] * dx.ndim + start = pad_width0 + stop = start + shape0[axis] + index[axis] = slice(start, stop) + mask[tuple(index)] = mask0 + + if pad_width0 > 0: + index[axis] = slice(0, start) + mask[tuple(index)] = True + + if pad_width1 > 0: + index[axis] = slice(stop, None) + mask[tuple(index)] = True + + dx = da.ma.masked_where(mask,dx) + + d._set_dask(dx) + return d + @_inplace_enabled(default=False) def percentile( self, @@ -2192,6 +2242,7 @@ def percentile( interpolation=None, interpolation2=None, ): + """Compute percentiles of the data along the specified axes. The default is to compute the percentiles along a flattened diff --git a/cf/field.py b/cf/field.py index 19e40b2535..fca8be7c9f 100644 --- a/cf/field.py +++ b/cf/field.py @@ -11953,6 +11953,75 @@ def halo( return f + @_inplace_enabled(default=False) + def pad_missing(self, axis, pad_width, + inplace=False): + """TODO + + :Parameters: + + axis: `str` or `int` + Select the domain axis which is to be padded, defined + by that which would be selected by passing the given + axis description to a call of the field construct's + `domain_axis` method. For example, for a value of + ``'X'``, the domain axis construct returned by + ``f.domain_axis('X')`` is selected. + + pad_width: (sequence of) `int` + Number of values padded before and after the edges of + the axis. ``(pad, pad)``, ``(pad,)`` and ``pad`` yield + the same before and after pad for each edge. + + {{pad_width: (sequence of) `int`}} + + {{inplace: `bool`, optional}} + + :Returns: + + `Field` or `None` + The padded field construct, or `None` if the operation + was in-place. + + """ + f = _inplace_enabled_define_and_cleanup(self) + + try: + axis1 = f._parse_axes(axis) + except ValueError: + raise ValueError( + f"Can't pad_missing: Bad axis specification: {axis!r}" + ) + + if len(axis1) != 1: + raise ValueError( + f"Can't pad_missing: Bad axis specification: {axis!r}" + ) + + data_axes = f.get_data_axes() + axis = axis1[0] + iaxis = data_axes.index(axis) + + # Pad the field + super(Field, f).pad_missing(iaxis, pad_width, inplace=True) + + # Set new domain axis size + domain_axis =f.domain_axis(axis) + domain_axis.set_size(f.shape[iaxis]) + + data_axes = f.constructs.data_axes() + for key, construct in f.constructs.filter_by_data().items(): + construct_axes = data_axes[key] + + if axis not in construct_axes: + continue + + # Pad the construct + iaxis = construct_axes.index(axis) + construct.pad_missing(iaxis, pad_width, inplace=True) + + return f + def percentile( self, ranks, diff --git a/cf/mixin/fielddomain.py b/cf/mixin/fielddomain.py index 638f9aa855..f55f8cc32c 100644 --- a/cf/mixin/fielddomain.py +++ b/cf/mixin/fielddomain.py @@ -2150,7 +2150,7 @@ def _parse_axes(self, axes): axes = (axes,) return [self.domain_axis(x, key=True) for x in axes] - + def replace_construct( self, *identity, new=None, copy=True, **filter_kwargs ): diff --git a/cf/mixin/propertiesdata.py b/cf/mixin/propertiesdata.py index de0706251b..c5df4d2707 100644 --- a/cf/mixin/propertiesdata.py +++ b/cf/mixin/propertiesdata.py @@ -1888,6 +1888,25 @@ def minimum(self): "ERROR: Can't get the minimum when there is no data array" ) + @_inplace_enabled(default=False) + def pad_missing(self, axis, pad_width, inplace=False): + """TODO + + :Parameters: + + pad_width: `int` or sequence of array_like + Number of values padded before and after the edges of + the axis. ``(pad, pad)``, ``(pad,)`` and `int` yield + the same before and after pad for each edge. + + """ + return self._apply_data_oper( + _inplace_enabled_define_and_cleanup(self), + "pad_missing", + axis=axis, pad_width=pad_width, + inplace=inplace, + ) + def period(self, *value, **config): """Return or set the period of the data. diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index 4f854a4b6d..8407338d2b 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -3874,6 +3874,28 @@ def inspect(self): """ print(cf_inspect(self)) # pragma: no cover + @_inplace_enabled(default=False) + def pad_missing(self, axis, pad_width, + inplace=False): + """TODO + + :Parameters: + + pad_width: (sequence of) `int` + Number of values padded before and after the edges of + the axis. ``(pad, pad)``, ``(pad,)`` and ``pad`` yield + the same before and after pad for each edge. + + """ + return self._apply_superclass_data_oper( + _inplace_enabled_define_and_cleanup(self), + "pad_missing", + bounds=True, + interior_ring=True, + axis=axis, pad_width=pad_width, + inplace=inplace, + ) + def period(self, *value, **config): """Return or set the period for cyclic values. From b4b4475bc9381e614f1b4f441e30d782bdf7cc3c Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 20 Feb 2024 22:25:18 +0000 Subject: [PATCH 2/6] dev --- Changelog.rst | 6 ++ cf/data/data.py | 79 ++++++++++++++------------- cf/docstring/docstring.py | 4 ++ cf/field.py | 94 +++++++++++++++++++++++--------- cf/mixin/fielddomain.py | 2 +- cf/mixin/propertiesdata.py | 22 ++++++-- cf/mixin/propertiesdatabounds.py | 25 ++++++--- cf/test/test_read_write.py | 1 + 8 files changed, 153 insertions(+), 80 deletions(-) diff --git a/Changelog.rst b/Changelog.rst index 918ba2a6d2..a9ac7496ba 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -3,6 +3,12 @@ version 3.17.0 **2024-??-??** +* Added spherical regridding to discrete sampling geometry destination + grids (https://github.com/NCAS-CMS/cf-python/issues/716) +* Added 3-d spherical regridding to `cf.Field.regrids`, and the option + to regrid the vertical axis in logarithmic coordinates to + `cf.Field.regrids` and `cf.Field.regridc` + (https://github.com/NCAS-CMS/cf-python/issues/715) * Added the ``cell_measures`` and ``coordinates`` keyword arguments to `cf.Field.weights` (https://github.com/NCAS-CMS/cf-python/issues/709) diff --git a/cf/data/data.py b/cf/data/data.py index d42e600dff..c9309cecae 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -2182,54 +2182,56 @@ def mean_of_upper_decile( @_inplace_enabled(default=False) def pad_missing(self, axis, pad_width, inplace=False): - """TODO + """Pad an axis with missing data. :Parameters: - pad_width: `int` or sequence of array_like - Number of values padded before and after the edges of - the axis. ``(pad, pad)``, ``(pad,)`` and `int` yield - the same before and after pad for each edge. + axis: `int` + Select the axis for which the padding is to be + applied. + + *Parameter example:* + Pad second axis: ``axis=1``. + + *Parameter example:* + Pad the last axis: ``axis=-1``. + + {{pad_width: sequence of `int`}} + + {{inplace: `bool`, optional}} + + :Returns: + + `Data` or `None` + The padded data, or `None` if the operation was + in-place. """ + try: + pad_width0, pad_width1 = pad_width + except (TypeError, ValueError): + raise ValueError("'pad_width' must be a sequence of two integers") + d = _inplace_enabled_define_and_cleanup(self) + dx = d.to_dask_array() mask0 = da.ma.getmaskarray(dx) - shape0 = dx.shape - - try: - pad_width0, pad_width1 = pad_width - except TypeError: - try: - pad_width0= pad_width1 = pad_width[0] - except TypeError: - pad_width0= pad_width1 = pad_width - - pad_width = [(0, 0)] * dx.ndim - pad_width[axis] = (pad_width0, pad_width1 ) - - dx = da.pad(dx, pad_width, mode='constant', constant_values=0) - mask = da.pad(mask0, pad_width, mode='constant', constant_values=0) - - index = [slice(None)] * dx.ndim - start = pad_width0 - stop = start + shape0[axis] - index[axis] = slice(start, stop) - mask[tuple(index)] = mask0 - - if pad_width0 > 0: - index[axis] = slice(0, start) - mask[tuple(index)] = True - - if pad_width1 > 0: - index[axis] = slice(stop, None) - mask[tuple(index)] = True - - dx = da.ma.masked_where(mask,dx) - + + pad = [(0, 0)] * dx.ndim + pad[axis] = pad_width + + # Pad the data with zero. This will lose the original mask. + dx = da.pad(dx, pad, mode="constant", constant_values=0) + + # Pad the mask with True + mask = da.pad(mask0, pad, mode="constant", constant_values=True) + + # Set the mask + dx = da.ma.masked_where(mask, dx) + d._set_dask(dx) return d - + @_inplace_enabled(default=False) def percentile( self, @@ -2242,7 +2244,6 @@ def percentile( interpolation=None, interpolation2=None, ): - """Compute percentiles of the data along the specified axes. The default is to compute the percentiles along a flattened diff --git a/cf/docstring/docstring.py b/cf/docstring/docstring.py index 172ed469e4..9dc8f69789 100644 --- a/cf/docstring/docstring.py +++ b/cf/docstring/docstring.py @@ -587,6 +587,10 @@ "{{weights auto: `bool`, optional}}": """auto: `bool`, optional If True then return `False` if weights can't be found, rather than raising an exception.""", + # pad_width + "{{pad_width: sequence of `int`}}": """pad_width: sequence of `int` + Number of values to pad before and after the edges of + the axis.""", # ---------------------------------------------------------------- # Method description substitutions (4 levels of indentation) # ---------------------------------------------------------------- diff --git a/cf/field.py b/cf/field.py index fca8be7c9f..7b8bf3eb00 100644 --- a/cf/field.py +++ b/cf/field.py @@ -11954,34 +11954,77 @@ def halo( return f @_inplace_enabled(default=False) - def pad_missing(self, axis, pad_width, - inplace=False): - """TODO + def pad_missing(self, axis, pad_width, inplace=False): + """Pad an axis with missing data. + + The field's data and all metadata constructs that span the + axis are padded. :Parameters: - axis: `str` or `int` - Select the domain axis which is to be padded, defined - by that which would be selected by passing the given - axis description to a call of the field construct's - `domain_axis` method. For example, for a value of - ``'X'``, the domain axis construct returned by - ``f.domain_axis('X')`` is selected. + axis: `str` or `int` + Select the domain axis which is to be padded, defined + by that which would be selected by passing the given + axis description to a call of the field construct's + `domain_axis` method. For example, for a value of + ``'X'``, the domain axis construct returned by + ``f.domain_axis('X')`` is selected. - pad_width: (sequence of) `int` - Number of values padded before and after the edges of - the axis. ``(pad, pad)``, ``(pad,)`` and ``pad`` yield - the same before and after pad for each edge. + {{pad_width: sequence of `int`}} - {{pad_width: (sequence of) `int`}} + {{inplace: `bool`, optional}} - {{inplace: `bool`, optional}} + :Returns: - :Returns: + `Field` or `None` + The padded field construct, or `None` if the operation + was in-place. - `Field` or `None` - The padded field construct, or `None` if the operation - was in-place. + **Examples* + + >>> f = cf.example_field(6) + >>> print(f) + Field: precipitation_amount (ncvar%pr) + -------------------------------------- + Data : precipitation_amount(cf_role=timeseries_id(2), time(4)) + Dimension coords: time(4) = [2000-01-16 12:00:00, ..., 2000-04-15 00:00:00] gregorian + Auxiliary coords: latitude(cf_role=timeseries_id(2)) = [25.0, 7.0] degrees_north + : longitude(cf_role=timeseries_id(2)) = [10.0, 40.0] degrees_east + : cf_role=timeseries_id(cf_role=timeseries_id(2)) = [x1, y2] + : altitude(cf_role=timeseries_id(2), 3, 4) = [[[1.0, ..., --]]] m + Coord references: grid_mapping_name:latitude_longitude + >>> print(f.array) + [[1. 2. 3. 4.] + [5. 6. 7. 8.]] + >>> g = f.pad_missing('T', (0, 5)) + >>> print(g) + Field: precipitation_amount (ncvar%pr) + -------------------------------------- + Data : precipitation_amount(cf_role=timeseries_id(2), time(9)) + Dimension coords: time(9) = [2000-01-16 12:00:00, ..., --] gregorian + Auxiliary coords: latitude(cf_role=timeseries_id(2)) = [25.0, 7.0] degrees_north + : longitude(cf_role=timeseries_id(2)) = [10.0, 40.0] degrees_east + : cf_role=timeseries_id(cf_role=timeseries_id(2)) = [x1, y2] + : altitude(cf_role=timeseries_id(2), 3, 4) = [[[1.0, ..., --]]] m + Coord references: grid_mapping_name:latitude_longitude + >>> print(g.array) + [[1.0 2.0 3.0 4.0 -- -- -- -- --] + [5.0 6.0 7.0 8.0 -- -- -- -- --]] + >>> h = g.pad_missing('cf_role=timeseries_id', (0, 1)) + >>> print(h) + Field: precipitation_amount (ncvar%pr) + -------------------------------------- + Data : precipitation_amount(cf_role=timeseries_id(3), time(9)) + Dimension coords: time(9) = [2000-01-16 12:00:00, ..., --] gregorian + Auxiliary coords: latitude(cf_role=timeseries_id(3)) = [25.0, 7.0, --] degrees_north + : longitude(cf_role=timeseries_id(3)) = [10.0, 40.0, --] degrees_east + : cf_role=timeseries_id(cf_role=timeseries_id(3)) = [x1, y2, --] + : altitude(cf_role=timeseries_id(3), 3, 4) = [[[1.0, ..., --]]] m + Coord references: grid_mapping_name:latitude_longitude + >>> print(h.array) + [[1.0 2.0 3.0 4.0 -- -- -- -- --] + [5.0 6.0 7.0 8.0 -- -- -- -- --] + [ -- -- -- -- -- -- -- -- --]] """ f = _inplace_enabled_define_and_cleanup(self) @@ -11997,8 +12040,8 @@ def pad_missing(self, axis, pad_width, raise ValueError( f"Can't pad_missing: Bad axis specification: {axis!r}" ) - - data_axes = f.get_data_axes() + + data_axes = f.get_data_axes() axis = axis1[0] iaxis = data_axes.index(axis) @@ -12006,13 +12049,12 @@ def pad_missing(self, axis, pad_width, super(Field, f).pad_missing(iaxis, pad_width, inplace=True) # Set new domain axis size - domain_axis =f.domain_axis(axis) + domain_axis = f.domain_axis(axis) domain_axis.set_size(f.shape[iaxis]) data_axes = f.constructs.data_axes() - for key, construct in f.constructs.filter_by_data().items(): + for key, construct in f.constructs.filter_by_data(todict=True).items(): construct_axes = data_axes[key] - if axis not in construct_axes: continue @@ -12021,7 +12063,7 @@ def pad_missing(self, axis, pad_width, construct.pad_missing(iaxis, pad_width, inplace=True) return f - + def percentile( self, ranks, diff --git a/cf/mixin/fielddomain.py b/cf/mixin/fielddomain.py index f55f8cc32c..638f9aa855 100644 --- a/cf/mixin/fielddomain.py +++ b/cf/mixin/fielddomain.py @@ -2150,7 +2150,7 @@ def _parse_axes(self, axes): axes = (axes,) return [self.domain_axis(x, key=True) for x in axes] - + def replace_construct( self, *identity, new=None, copy=True, **filter_kwargs ): diff --git a/cf/mixin/propertiesdata.py b/cf/mixin/propertiesdata.py index c5df4d2707..8b310c215f 100644 --- a/cf/mixin/propertiesdata.py +++ b/cf/mixin/propertiesdata.py @@ -1890,20 +1890,30 @@ def minimum(self): @_inplace_enabled(default=False) def pad_missing(self, axis, pad_width, inplace=False): - """TODO + """Pad an axis with missing data. :Parameters: - pad_width: `int` or sequence of array_like - Number of values padded before and after the edges of - the axis. ``(pad, pad)``, ``(pad,)`` and `int` yield - the same before and after pad for each edge. + axis: `int` + Select the axis for which the padding is to be + applied. + + {{pad_width: sequence of `int`}} + + {{inplace: `bool`, optional}} + + :Returns: + + `{{class}]` or `None` + The {{class}} with padded data, or `None` if the + operation was in-place. """ return self._apply_data_oper( _inplace_enabled_define_and_cleanup(self), "pad_missing", - axis=axis, pad_width=pad_width, + axis=axis, + pad_width=pad_width, inplace=inplace, ) diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index 8407338d2b..b960667ec1 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -3875,16 +3875,24 @@ def inspect(self): print(cf_inspect(self)) # pragma: no cover @_inplace_enabled(default=False) - def pad_missing(self, axis, pad_width, - inplace=False): - """TODO + def pad_missing(self, axis, pad_width, inplace=False): + """Pad an axis with missing data. :Parameters: - pad_width: (sequence of) `int` - Number of values padded before and after the edges of - the axis. ``(pad, pad)``, ``(pad,)`` and ``pad`` yield - the same before and after pad for each edge. + axis: `int` + Select the axis for which the padding is to be + applied. + + {{pad_width: sequence of `int`}} + + {{inplace: `bool`, optional}} + + :Returns: + + `{{class}]` or `None` + The {{class}} with padded data, or `None` if the + operation was in-place. """ return self._apply_superclass_data_oper( @@ -3892,7 +3900,8 @@ def pad_missing(self, axis, pad_width, "pad_missing", bounds=True, interior_ring=True, - axis=axis, pad_width=pad_width, + axis=axis, + pad_width=pad_width, inplace=inplace, ) diff --git a/cf/test/test_read_write.py b/cf/test/test_read_write.py index 0eefa1b2ac..85fa23936f 100644 --- a/cf/test/test_read_write.py +++ b/cf/test/test_read_write.py @@ -920,6 +920,7 @@ def test_write_omit_data(self): self.assertFalse(g.array.count()) self.assertTrue(g.construct("grid_latitude").array.count()) + @unittest.skipUnless(False, "Target file currently unavailable") def test_read_url(self): """Test reading urls.""" for scheme in ("http", "https"): From e59e773ad0dc986b947fcd9f11a93449347b4fcb Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 21 Feb 2024 09:46:33 +0000 Subject: [PATCH 3/6] dev --- Changelog.rst | 8 ++------ cf/aggregate.py | 10 +++++++++- cf/data/data.py | 16 ++++++++++++++++ cf/mixin/propertiesdata.py | 2 +- cf/mixin/propertiesdatabounds.py | 2 +- cf/test/test_Data.py | 13 +++++++++++++ cf/test/test_Field.py | 13 +++++++++++++ cf/test/test_read_write.py | 1 - 8 files changed, 55 insertions(+), 10 deletions(-) diff --git a/Changelog.rst b/Changelog.rst index a9ac7496ba..cf860aa5bb 100644 --- a/Changelog.rst +++ b/Changelog.rst @@ -3,12 +3,8 @@ version 3.17.0 **2024-??-??** -* Added spherical regridding to discrete sampling geometry destination - grids (https://github.com/NCAS-CMS/cf-python/issues/716) -* Added 3-d spherical regridding to `cf.Field.regrids`, and the option - to regrid the vertical axis in logarithmic coordinates to - `cf.Field.regrids` and `cf.Field.regridc` - (https://github.com/NCAS-CMS/cf-python/issues/715) +* New methods: `cf.Field.pad_missing` and `cf.Data.pad_missing` + (https://github.com/NCAS-CMS/cf-python/issues/717) * Added the ``cell_measures`` and ``coordinates`` keyword arguments to `cf.Field.weights` (https://github.com/NCAS-CMS/cf-python/issues/709) diff --git a/cf/aggregate.py b/cf/aggregate.py index 6fe53c2c9f..f215e4fc06 100644 --- a/cf/aggregate.py +++ b/cf/aggregate.py @@ -3920,7 +3920,15 @@ def _sort_indices(m, canonical_axes): """ canonical_axes = [m.id_to_axis[identity] for identity in canonical_axes] sort_indices = tuple([m.sort_indices[axis] for axis in canonical_axes]) - needs_sorting = sort_indices != (slice(None),) * len(sort_indices) + try: + needs_sorting = sort_indices != (slice(None),) * len(sort_indices) + except ValueError: + # An element of 'sort_indices' is a numpy array with two or + # more elements, resulting in an "ValueError: The truth value + # of an array with more than one element is ambiguous. Use + # a.any() or a.all()" exception. + needs_sorting = True + return sort_indices, needs_sorting diff --git a/cf/data/data.py b/cf/data/data.py index c9309cecae..82a5e68e4e 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -2206,6 +2206,22 @@ def pad_missing(self, axis, pad_width, inplace=False): The padded data, or `None` if the operation was in-place. + **Examples** + + >>> d = cf.Data(np.arange(6).reshape(2, 3)) + >>> print(d.array) + [[0 1 2] + [3 4 5]] + >>> e = d.pad_missing(1, (1, 2)) + >>> print(e.array) + [[-- 0 1 2 -- --] + [-- 3 4 5 -- --]] + >>> f = e.pad_missing(0, (0, 1)) + >>> print(f.array) + [[-- 0 1 2 -- --] + [-- 3 4 5 -- --] + [-- -- -- -- -- --]] + """ try: pad_width0, pad_width1 = pad_width diff --git a/cf/mixin/propertiesdata.py b/cf/mixin/propertiesdata.py index 8b310c215f..8427d6b1be 100644 --- a/cf/mixin/propertiesdata.py +++ b/cf/mixin/propertiesdata.py @@ -1904,7 +1904,7 @@ def pad_missing(self, axis, pad_width, inplace=False): :Returns: - `{{class}]` or `None` + `{{class}}` or `None` The {{class}} with padded data, or `None` if the operation was in-place. diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index b960667ec1..621f54dd98 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -3890,7 +3890,7 @@ def pad_missing(self, axis, pad_width, inplace=False): :Returns: - `{{class}]` or `None` + `{{class}}` or `None` The {{class}} with padded data, or `None` if the operation was in-place. diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 2d439ba695..62d3336996 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -4731,6 +4731,19 @@ def test_Data_sparse_array(self): with self.assertRaises(ValueError): cf.Data(s, mask=mask) + def test_Data_pad_missing(self): + """Test Data.pad_missing.""" + d = cf.Data(np.arange(6).reshape(2, 3)) + + self.assertIsNone(d.pad_missing(1, (1, 2), inplace=True)) + self.assertEqual(d.shape, (2, 6)) + self.assertTrue(d[:, 0].mask.all()) + self.assertTrue(d[:, 4:].mask.all()) + + e = d.pad_missing(0, (0, 1)) + self.assertEqual(e.shape, (3, 6)) + self.assertTrue(e[2, :].mask.all()) + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) diff --git a/cf/test/test_Field.py b/cf/test/test_Field.py index b253484d72..3a952976b6 100644 --- a/cf/test/test_Field.py +++ b/cf/test/test_Field.py @@ -2658,6 +2658,19 @@ def test_Field_file_location(self): f.del_file_location("/invalid") self.assertEqual(f.file_locations(), set((location,))) + def test_Data_pad_missing(self): + """Test Field.pad_missing.""" + f = cf.example_field(0) + + self.assertIsNone(f.pad_missing("X", (1, 2), inplace=True)) + self.assertEqual(f.shape, (5, 11)) + self.assertTrue(f[:, 0].mask.all()) + self.assertTrue(f[:, 9:].mask.all()) + + g = f.pad_missing("Y", (0, 1)) + self.assertEqual(g.shape, (6, 11)) + self.assertTrue(g[5, :].mask.all()) + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) diff --git a/cf/test/test_read_write.py b/cf/test/test_read_write.py index 85fa23936f..0eefa1b2ac 100644 --- a/cf/test/test_read_write.py +++ b/cf/test/test_read_write.py @@ -920,7 +920,6 @@ def test_write_omit_data(self): self.assertFalse(g.array.count()) self.assertTrue(g.construct("grid_latitude").array.count()) - @unittest.skipUnless(False, "Target file currently unavailable") def test_read_url(self): """Test reading urls.""" for scheme in ("http", "https"): From 04b6ee689ade76c453aed3586523c37671bb74f0 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Wed, 21 Feb 2024 10:19:00 +0000 Subject: [PATCH 4/6] versionadded --- cf/field.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cf/field.py b/cf/field.py index 7b8bf3eb00..15ad5e68b6 100644 --- a/cf/field.py +++ b/cf/field.py @@ -11960,6 +11960,8 @@ def pad_missing(self, axis, pad_width, inplace=False): The field's data and all metadata constructs that span the axis are padded. + .. versionadded:: 3.17.0 + :Parameters: axis: `str` or `int` From 57fe9424ccd05517ee66817891aca98d2b5ab891 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Fri, 23 Feb 2024 17:43:19 +0000 Subject: [PATCH 5/6] better 'needs_sorting' tests --- cf/aggregate.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/cf/aggregate.py b/cf/aggregate.py index f215e4fc06..f95814da38 100644 --- a/cf/aggregate.py +++ b/cf/aggregate.py @@ -3920,14 +3920,21 @@ def _sort_indices(m, canonical_axes): """ canonical_axes = [m.id_to_axis[identity] for identity in canonical_axes] sort_indices = tuple([m.sort_indices[axis] for axis in canonical_axes]) - try: - needs_sorting = sort_indices != (slice(None),) * len(sort_indices) - except ValueError: - # An element of 'sort_indices' is a numpy array with two or - # more elements, resulting in an "ValueError: The truth value - # of an array with more than one element is ambiguous. Use - # a.any() or a.all()" exception. - needs_sorting = True + + # Whether or not one or more of the axes needs sorting + needs_sorting = False + for sort_index in sort_indices: + # Note: sort_index can only be a slice object or a numpy array + # (see `_create_hash_and_first_values`) + if isinstance(sort_index, slice): + if sort_index != slice(None): + # sort_index is a slice other than slice(None) + needs_sorting = True + break + elif sort_index.size > 1: + # sort_index is an array of 2 or more integers + needs_sorting = True + break return sort_indices, needs_sorting From 54c72a5a48453f6f87ca859e5e2dc14eec745ba9 Mon Sep 17 00:00:00 2001 From: David Hassell Date: Tue, 27 Feb 2024 11:56:37 +0000 Subject: [PATCH 6/6] Add 'to_size' keyword parameter --- cf/data/data.py | 47 ++++++++++++++++++++++++++++---- cf/docstring/docstring.py | 6 +++- cf/field.py | 27 ++++++++++++++---- cf/mixin/propertiesdata.py | 7 +++-- cf/mixin/propertiesdatabounds.py | 7 +++-- cf/test/test_Data.py | 16 +++++++++-- cf/test/test_Field.py | 10 +++++-- 7 files changed, 99 insertions(+), 21 deletions(-) diff --git a/cf/data/data.py b/cf/data/data.py index 82a5e68e4e..9c730b72d7 100644 --- a/cf/data/data.py +++ b/cf/data/data.py @@ -2181,7 +2181,7 @@ def mean_of_upper_decile( return d @_inplace_enabled(default=False) - def pad_missing(self, axis, pad_width, inplace=False): + def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False): """Pad an axis with missing data. :Parameters: @@ -2196,7 +2196,9 @@ def pad_missing(self, axis, pad_width, inplace=False): *Parameter example:* Pad the last axis: ``axis=-1``. - {{pad_width: sequence of `int`}} + {{pad_width: sequence of `int`, optional}} + + {{to_size: `int`, optional}} {{inplace: `bool`, optional}} @@ -2222,11 +2224,44 @@ def pad_missing(self, axis, pad_width, inplace=False): [-- 3 4 5 -- --] [-- -- -- -- -- --]] + >>> g = d.pad_missing(1, to_size=5) + >>> print(g.array) + [[0 1 2 -- --] + [3 4 5 -- --]] + """ - try: - pad_width0, pad_width1 = pad_width - except (TypeError, ValueError): - raise ValueError("'pad_width' must be a sequence of two integers") + if not 0 <= axis < self.ndim: + raise ValueError( + f"'axis' must be a valid dimension position. Got {axis}" + ) + + if to_size is not None: + # Set pad_width from to_size + if pad_width is not None: + raise ValueError("Can't set both 'pad_width' and 'to_size'") + + pad_width = (0, to_size - self.shape[axis]) + elif pad_width is None: + raise ValueError("Must set either 'pad_width' or 'to_size'") + + pad_width = np.asarray(pad_width) + if pad_width.shape != (2,) or not pad_width.dtype.kind == "i": + raise ValueError( + "'pad_width' must be a sequence of two integers. " + f"Got: {pad_width}" + ) + + pad_width = tuple(pad_width) + if any(n < 0 for n in pad_width): + if to_size is not None: + raise ValueError( + f"'to_size' ({to_size}) must not be smaller than the " + f"original axis size ({self.shape[axis]})" + ) + + raise ValueError( + f"Can't set a negative number of pad values. Got: {pad_width}" + ) d = _inplace_enabled_define_and_cleanup(self) diff --git a/cf/docstring/docstring.py b/cf/docstring/docstring.py index 9dc8f69789..28a7360df4 100644 --- a/cf/docstring/docstring.py +++ b/cf/docstring/docstring.py @@ -588,9 +588,13 @@ If True then return `False` if weights can't be found, rather than raising an exception.""", # pad_width - "{{pad_width: sequence of `int`}}": """pad_width: sequence of `int` + "{{pad_width: sequence of `int`, optional}}": """pad_width: sequence of `int`, optional Number of values to pad before and after the edges of the axis.""", + # to_size + "{{to_size: `int`, optional}}": """to_size: `int`, optional + Pad the axis after so that the new axis has the given + size.""", # ---------------------------------------------------------------- # Method description substitutions (4 levels of indentation) # ---------------------------------------------------------------- diff --git a/cf/field.py b/cf/field.py index e429cb8166..9ce8d88c6d 100644 --- a/cf/field.py +++ b/cf/field.py @@ -11954,7 +11954,7 @@ def halo( return f @_inplace_enabled(default=False) - def pad_missing(self, axis, pad_width, inplace=False): + def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False): """Pad an axis with missing data. The field's data and all metadata constructs that span the @@ -11972,7 +11972,9 @@ def pad_missing(self, axis, pad_width, inplace=False): ``'X'``, the domain axis construct returned by ``f.domain_axis('X')`` is selected. - {{pad_width: sequence of `int`}} + {{pad_width: sequence of `int`, optional}} + + {{to_size: `int`, optional}} {{inplace: `bool`, optional}} @@ -11984,7 +11986,7 @@ def pad_missing(self, axis, pad_width, inplace=False): **Examples* - >>> f = cf.example_field(6) + >>> f = cf.example_field(6) >>> print(f) Field: precipitation_amount (ncvar%pr) -------------------------------------- @@ -12028,6 +12030,17 @@ def pad_missing(self, axis, pad_width, inplace=False): [5.0 6.0 7.0 8.0 -- -- -- -- --] [ -- -- -- -- -- -- -- -- --]] + >>> print(f.pad_missing('time', to_size=6)) + Field: precipitation_amount (ncvar%pr) + -------------------------------------- + Data : precipitation_amount(cf_role=timeseries_id(2), time(6)) + Dimension coords: time(6) = [2000-01-16 12:00:00, ..., --] gregorian + Auxiliary coords: latitude(cf_role=timeseries_id(2)) = [25.0, 7.0] degrees_north + : longitude(cf_role=timeseries_id(2)) = [10.0, 40.0] degrees_east + : cf_role=timeseries_id(cf_role=timeseries_id(2)) = [x1, y2] + : altitude(cf_role=timeseries_id(2), 3, 4) = [[[1.0, ..., --]]] m + Coord references: grid_mapping_name:latitude_longitude + """ f = _inplace_enabled_define_and_cleanup(self) @@ -12048,7 +12061,9 @@ def pad_missing(self, axis, pad_width, inplace=False): iaxis = data_axes.index(axis) # Pad the field - super(Field, f).pad_missing(iaxis, pad_width, inplace=True) + super(Field, f).pad_missing( + iaxis, pad_width=pad_width, to_size=to_size, inplace=True + ) # Set new domain axis size domain_axis = f.domain_axis(axis) @@ -12062,7 +12077,9 @@ def pad_missing(self, axis, pad_width, inplace=False): # Pad the construct iaxis = construct_axes.index(axis) - construct.pad_missing(iaxis, pad_width, inplace=True) + construct.pad_missing( + iaxis, pad_width=pad_width, to_size=to_size, inplace=True + ) return f diff --git a/cf/mixin/propertiesdata.py b/cf/mixin/propertiesdata.py index 8427d6b1be..80269e11e5 100644 --- a/cf/mixin/propertiesdata.py +++ b/cf/mixin/propertiesdata.py @@ -1889,7 +1889,7 @@ def minimum(self): ) @_inplace_enabled(default=False) - def pad_missing(self, axis, pad_width, inplace=False): + def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False): """Pad an axis with missing data. :Parameters: @@ -1898,7 +1898,9 @@ def pad_missing(self, axis, pad_width, inplace=False): Select the axis for which the padding is to be applied. - {{pad_width: sequence of `int`}} + {{pad_width: sequence of `int`, optional}} + + {{to_size: `int`, optional}} {{inplace: `bool`, optional}} @@ -1914,6 +1916,7 @@ def pad_missing(self, axis, pad_width, inplace=False): "pad_missing", axis=axis, pad_width=pad_width, + to_size=to_size, inplace=inplace, ) diff --git a/cf/mixin/propertiesdatabounds.py b/cf/mixin/propertiesdatabounds.py index 621f54dd98..15434785ce 100644 --- a/cf/mixin/propertiesdatabounds.py +++ b/cf/mixin/propertiesdatabounds.py @@ -3875,7 +3875,7 @@ def inspect(self): print(cf_inspect(self)) # pragma: no cover @_inplace_enabled(default=False) - def pad_missing(self, axis, pad_width, inplace=False): + def pad_missing(self, axis, pad_width=None, to_size=None, inplace=False): """Pad an axis with missing data. :Parameters: @@ -3884,7 +3884,9 @@ def pad_missing(self, axis, pad_width, inplace=False): Select the axis for which the padding is to be applied. - {{pad_width: sequence of `int`}} + {{pad_width: sequence of `int`, optional}} + + {{to_size: `int`, optional}} {{inplace: `bool`, optional}} @@ -3902,6 +3904,7 @@ def pad_missing(self, axis, pad_width, inplace=False): interior_ring=True, axis=axis, pad_width=pad_width, + to_size=to_size, inplace=inplace, ) diff --git a/cf/test/test_Data.py b/cf/test/test_Data.py index 62d3336996..53656208ef 100644 --- a/cf/test/test_Data.py +++ b/cf/test/test_Data.py @@ -4735,15 +4735,27 @@ def test_Data_pad_missing(self): """Test Data.pad_missing.""" d = cf.Data(np.arange(6).reshape(2, 3)) - self.assertIsNone(d.pad_missing(1, (1, 2), inplace=True)) + g = d.pad_missing(1, to_size=5) + self.assertEqual(g.shape, (2, 5)) + self.assertTrue(g[:, 3:].mask.all()) + + self.assertIsNone(d.pad_missing(1, pad_width=(1, 2), inplace=True)) self.assertEqual(d.shape, (2, 6)) self.assertTrue(d[:, 0].mask.all()) self.assertTrue(d[:, 4:].mask.all()) - e = d.pad_missing(0, (0, 1)) + e = d.pad_missing(0, pad_width=(0, 1)) self.assertEqual(e.shape, (3, 6)) self.assertTrue(e[2, :].mask.all()) + # Can't set both pad_width and to_size + with self.assertRaises(ValueError): + d.pad_missing(0, pad_width=(0, 1), to_size=99) + + # Axis out of bounds + with self.assertRaises(ValueError): + d.pad_missing(99, to_size=99) + if __name__ == "__main__": print("Run date:", datetime.datetime.now()) diff --git a/cf/test/test_Field.py b/cf/test/test_Field.py index e2e0236e33..6f528642fa 100644 --- a/cf/test/test_Field.py +++ b/cf/test/test_Field.py @@ -2669,16 +2669,20 @@ def test_Field_file_location(self): f.del_file_location("/invalid") self.assertEqual(f.file_locations(), set((location,))) - def test_Data_pad_missing(self): + def test_Field_pad_missing(self): """Test Field.pad_missing.""" f = cf.example_field(0) - self.assertIsNone(f.pad_missing("X", (1, 2), inplace=True)) + g = f.pad_missing("X", to_size=10) + self.assertEqual(g.shape, (5, 10)) + self.assertTrue(g[:, 8:].mask.all()) + + self.assertIsNone(f.pad_missing("X", pad_width=(1, 2), inplace=True)) self.assertEqual(f.shape, (5, 11)) self.assertTrue(f[:, 0].mask.all()) self.assertTrue(f[:, 9:].mask.all()) - g = f.pad_missing("Y", (0, 1)) + g = f.pad_missing("Y", pad_width=(0, 1)) self.assertEqual(g.shape, (6, 11)) self.assertTrue(g[5, :].mask.all())