Skip to content

Commit

Permalink
Add support for masked data (closes #20)
Browse files Browse the repository at this point in the history
  • Loading branch information
stephenworsley authored Dec 14, 2020
1 parent 4477af0 commit 77f2119
Show file tree
Hide file tree
Showing 7 changed files with 247 additions and 25 deletions.
50 changes: 25 additions & 25 deletions esmf_regrid/esmf_regridder.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,51 +266,51 @@ def __init__(self, src, tgt, precomputed_weights=None):
)
self.weight_matrix = precomputed_weights

def regrid(self, src_array, mdtol=1):
def regrid(self, src_array, norm_type="fracarea", mdtol=1):
"""
Perform regridding on an array of data.
Parameters
----------
src_array : array_like
A numpy array whose shape is compatible with self.src
mdtol : int, optional
norm_type : string
Either "fracarea" or "dstarea", defaults to "fracarea". Determines the
type of normalisation applied to the weights. Normalisations correspond
to ESMF constants ESMF.NormType.FRACAREA and ESMF.NormType.DSTAREA.
mdtol : float, optional
A number between 0 and 1 describing the missing data tolerance.
Depending on the value of mdtol, if an element in the target mesh/grid
is not sufficiently covered by elements of the source mesh/grid, then
the corresponding data point will be masked. An mdtol of 1 means that
only target elements which are completely uncovered will be masked,
an mdtol of 0 means that only target elements which are completely
covered will be unmasked and an mdtol of 0.5 means that target elements
whose area is at least half uncovered by source elements will be masked.
Depending on the value of `mdtol`, if a cell in the target grid is not
sufficiently covered by unmasked cells of the source grid, then it will
be masked. An `mdtol` of 1 means that only target cells which are not
covered at all will be masked, an `mdtol` of 0 means that all target
cells that are not entirely covered will be masked, and an `mdtol` of
0.5 means that all target cells that are less than half covered will
be masked.
Returns
-------
array_like
A numpy array whose shape is compatible with self.tgt.
"""
# TODO implement masked array handling similar to other iris regridders.

# A rudimentary filter is applied to mask data which is mapped from an
# insufficiently large source. This currently only accounts for discrepancies
# between the source and target grid/mesh geometries and does not account for
# masked data, though it ought to be possible to extend the functionality to
# handle masked data.
#
# Note that ESMPy is also able to handle masked data. It is worth investigating
# how this affects the mathematics and if it can be replicated after the fact
# using just the weights or if ESMF is doing something we want access to.
weight_sums = np.array(self.weight_matrix.sum(axis=1)).flatten()
src_inverted_mask = self.src._flatten_array(~ma.getmaskarray(src_array))
weight_sums = self.weight_matrix * src_inverted_mask
# Set the minimum mdtol to be slightly higher than 0 to account for rounding
# errors.
mdtol = max(mdtol, 1e-8)
tgt_mask = weight_sums >= 1 - mdtol
masked_weight_sums = weight_sums * tgt_mask.astype(int)
normalisations = np.where(masked_weight_sums == 0, 0, 1 / masked_weight_sums)
tgt_mask = weight_sums > 1 - mdtol
masked_weight_sums = weight_sums * tgt_mask
normalisations = np.ones(self.tgt.size())
if norm_type == "fracarea":
normalisations[tgt_mask] /= masked_weight_sums[tgt_mask]
elif norm_type == "dstarea":
pass
else:
raise ValueError(f'Normalisation type "{norm_type}" is not supported')
normalisations = ma.array(normalisations, mask=np.logical_not(tgt_mask))

flat_src = self.src._flatten_array(src_array)
flat_src = self.src._flatten_array(ma.filled(src_array, 0.0))
flat_tgt = self.weight_matrix * flat_src
flat_tgt = flat_tgt * normalisations
tgt_array = self.tgt._unflatten_array(flat_tgt)
Expand Down
32 changes: 32 additions & 0 deletions esmf_regrid/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import pathlib

import numpy as np


# Base directory of the test results.
_RESULT_PATH = pathlib.Path(__file__).parent.resolve() / "results"
Expand Down Expand Up @@ -37,3 +39,33 @@ def get_result_path(relative_path, unit=True):
result = _RESULT_PATH / relative_path

return result.resolve(strict=True)


def make_grid_args(nx, ny):
"""
Return arguments for a small grid.
Parameters
----------
nx : int
The number of cells spanned by the longitude.
ny : int
The number of cells spanned by the latutude
Returns
-------
Tuple
Arguments which can be passed to
:class:`~esmf_regrid.esmf_regridder.GridInfo.make_esmf_field`
"""
small_grid_lon = np.array(range(nx)) * 10 / nx
small_grid_lat = np.array(range(ny)) * 10 / ny

small_grid_lon_bounds = np.array(range(nx + 1)) * 10 / nx
small_grid_lat_bounds = np.array(range(ny + 1)) * 10 / ny
return (
small_grid_lon,
small_grid_lat,
small_grid_lon_bounds,
small_grid_lat_bounds,
)
1 change: 1 addition & 0 deletions esmf_regrid/tests/integration/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Integration tests for esmf_regrid."""
1 change: 1 addition & 0 deletions esmf_regrid/tests/integration/esmf_regridder/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Integration tests for :mod:`esmf_regrid.esmf_regridder`."""
67 changes: 67 additions & 0 deletions esmf_regrid/tests/integration/esmf_regridder/test_Regridder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Unit tests for :class:`esmf_regrid.esmf_regridder.Regridder`."""

import ESMF
import numpy as np
from numpy import ma

from esmf_regrid.esmf_regridder import GridInfo, Regridder
from esmf_regrid.tests import make_grid_args


def test_esmpy_normalisation():
"""
Integration test for :meth:`~esmf_regrid.esmf_regridder.Regridder`.
Checks against ESMF to ensure results are consistent.
"""
src_data = np.array(
[
[1.0, 1.0, 1.0],
[1.0, 0.0, 0.0],
],
)
src_mask = np.array(
[
[True, False, False],
[False, False, False],
]
)
src_array = ma.array(src_data, mask=src_mask)

lon, lat, lon_bounds, lat_bounds = make_grid_args(2, 3)
src_grid = GridInfo(lon, lat, lon_bounds, lat_bounds)
src_esmpy_grid = src_grid._make_esmf_grid()
src_esmpy_grid.add_item(ESMF.GridItem.MASK, staggerloc=ESMF.StaggerLoc.CENTER)
src_esmpy_grid.mask[0][...] = src_mask.T
src_field = ESMF.Field(src_esmpy_grid)
src_field.data[...] = src_data.T

lon, lat, lon_bounds, lat_bounds = make_grid_args(3, 2)
tgt_grid = GridInfo(lon, lat, lon_bounds, lat_bounds)
tgt_field = tgt_grid.make_esmf_field()

regridder = Regridder(src_grid, tgt_grid)

regridding_kwargs = {
"ignore_degenerate": True,
"regrid_method": ESMF.RegridMethod.CONSERVE,
"unmapped_action": ESMF.UnmappedAction.IGNORE,
"factors": True,
"src_mask_values": [1],
}
esmpy_fracarea_regridder = ESMF.Regrid(
src_field, tgt_field, norm_type=ESMF.NormType.FRACAREA, **regridding_kwargs
)
esmpy_dstarea_regridder = ESMF.Regrid(
src_field, tgt_field, norm_type=ESMF.NormType.DSTAREA, **regridding_kwargs
)

tgt_field_dstarea = esmpy_dstarea_regridder(src_field, tgt_field)
result_esmpy_dstarea = tgt_field_dstarea.data
result_dstarea = regridder.regrid(src_array, norm_type="dstarea").T
assert ma.allclose(result_esmpy_dstarea, result_dstarea)

tgt_field_fracarea = esmpy_fracarea_regridder(src_field, tgt_field)
result_esmpy_fracarea = tgt_field_fracarea.data
result_fracarea = regridder.regrid(src_array, norm_type="fracarea").T
assert ma.allclose(result_esmpy_fracarea, result_fracarea)
1 change: 1 addition & 0 deletions esmf_regrid/tests/unit/esmf_regridder/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Unit tests for :mod:`esmf_regrid.esmf_regridder`."""
120 changes: 120 additions & 0 deletions esmf_regrid/tests/unit/esmf_regridder/test_Regridder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
"""Unit tests for :class:`esmf_regrid.esmf_regridder.Regridder`."""

import numpy as np
from numpy import ma
import pytest
import scipy.sparse

from esmf_regrid.esmf_regridder import GridInfo, Regridder
from esmf_regrid.tests import make_grid_args


def _expected_weights():
weight_list = np.array(
[
0.6674194025656819,
0.3325805974343169,
0.3351257294386341,
0.6648742705613656,
0.33363933739884066,
0.1663606626011589,
0.333639337398841,
0.1663606626011591,
0.16742273275056854,
0.33250863479149745,
0.16742273275056876,
0.33250863479149767,
0.6674194025656823,
0.3325805974343174,
0.3351257294386344,
0.6648742705613663,
]
)
rows = np.array([0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5])
columns = np.array([0, 1, 1, 2, 0, 1, 3, 4, 1, 2, 4, 5, 3, 4, 4, 5])

shape = (6, 6)

weights = scipy.sparse.csr_matrix((weight_list, (rows, columns)), shape=shape)
return weights


def test_Regridder_init():
"""Basic test for :meth:`~esmf_regrid.esmf_regridder.Regridder.__init__`."""
lon, lat, lon_bounds, lat_bounds = make_grid_args(2, 3)
src_grid = GridInfo(lon, lat, lon_bounds, lat_bounds)

lon, lat, lon_bounds, lat_bounds = make_grid_args(3, 2)
tgt_grid = GridInfo(lon, lat, lon_bounds, lat_bounds)

rg = Regridder(src_grid, tgt_grid)

result = rg.weight_matrix
expected = _expected_weights()

assert np.allclose(result.toarray(), expected.toarray())


def test_Regridder_regrid():
"""Basic test for :meth:`~esmf_regrid.esmf_regridder.Regridder.regrid`."""
lon, lat, lon_bounds, lat_bounds = make_grid_args(2, 3)
src_grid = GridInfo(lon, lat, lon_bounds, lat_bounds)

lon, lat, lon_bounds, lat_bounds = make_grid_args(3, 2)
tgt_grid = GridInfo(lon, lat, lon_bounds, lat_bounds)

# Set up the regridder with precomputed weights.
rg = Regridder(src_grid, tgt_grid, precomputed_weights=_expected_weights())

src_array = np.array([[1.0, 1.0, 1.0], [1.0, 0.0, 0.0]])
src_masked = ma.array(src_array, mask=[[1, 0, 0], [0, 0, 0]])

# Regrid with unmasked data.
result_nomask = rg.regrid(src_array)
expected_nomask = ma.array(
[
[1.0, 1.0],
[0.8336393373988409, 0.4999999999999997],
[0.6674194025656824, 0.0],
]
)
assert ma.allclose(result_nomask, expected_nomask)

# Regrid with an masked array with no masked points.
result_ma_nomask = rg.regrid(ma.array(src_array))
assert ma.allclose(result_ma_nomask, expected_nomask)

# Regrid with a fully masked array.
result_fullmask = rg.regrid(ma.array(src_array, mask=True))
expected_fulmask = ma.array(np.zeros([3, 2]), mask=True)
assert ma.allclose(result_fullmask, expected_fulmask)

# Regrid with a masked array containing a masked point.
result_withmask = rg.regrid(src_masked)
expected_withmask = ma.array(
[
[0.9999999999999999, 1.0],
[0.7503444126612077, 0.4999999999999997],
[0.6674194025656824, 0.0],
]
)
assert ma.allclose(result_withmask, expected_withmask)

# Regrid while setting mdtol.
result_half_mdtol = rg.regrid(src_masked, mdtol=0.5)
expected_half_mdtol = ma.array(expected_withmask, mask=[[1, 0], [0, 0], [1, 0]])
assert ma.allclose(result_half_mdtol, expected_half_mdtol)

# Regrid with norm_type="dstarea".
result_dstarea = rg.regrid(src_masked, norm_type="dstarea")
expected_dstarea = ma.array(
[
[0.3325805974343169, 0.9999999999999998],
[0.4999999999999999, 0.499931367542066],
[0.6674194025656823, 0.0],
]
)
assert ma.allclose(result_dstarea, expected_dstarea)

with pytest.raises(ValueError):
_ = rg.regrid(src_masked, norm_type="INVALID")

0 comments on commit 77f2119

Please sign in to comment.