From 87063560ef8f4594b8b447fe7e3d286519c866c1 Mon Sep 17 00:00:00 2001 From: Philip Chmielowiec <67855069+philipc2@users.noreply.github.com> Date: Wed, 28 Aug 2024 00:07:24 -0500 Subject: [PATCH 1/6] setters for grid properties, skeleton for chunk method --- uxarray/grid/grid.py | 178 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) diff --git a/uxarray/grid/grid.py b/uxarray/grid/grid.py index 8a0bce175..657fd9a99 100644 --- a/uxarray/grid/grid.py +++ b/uxarray/grid/grid.py @@ -572,6 +572,12 @@ def n_nodes_per_face(self) -> xr.DataArray: return self._ds["n_nodes_per_face"] + @n_nodes_per_face.setter + def n_nodes_per_face(self, value): + """Setter for ``n_nodes_per_face``""" + assert isinstance(value, xr.DataArray) + self._ds["n_nodes_per_face"] = value + @property def node_lon(self) -> xr.DataArray: """Longitude of each node in degrees. @@ -583,6 +589,12 @@ def node_lon(self) -> xr.DataArray: _populate_node_latlon(self) return self._ds["node_lon"] + @node_lon.setter + def node_lon(self, value): + """Setter for ``node_lon``""" + assert isinstance(value, xr.DataArray) + self._ds["node_lon"] = value + @property def node_lat(self) -> xr.DataArray: """Latitude of each node in degrees. @@ -594,6 +606,12 @@ def node_lat(self) -> xr.DataArray: _populate_node_latlon(self) return self._ds["node_lat"] + @node_lat.setter + def node_lat(self, value): + """Setter for ``node_lat``""" + assert isinstance(value, xr.DataArray) + self._ds["node_lat"] = value + @property def node_x(self) -> xr.DataArray: """Cartesian x location of each node in meters. @@ -605,6 +623,12 @@ def node_x(self) -> xr.DataArray: return self._ds["node_x"] + @node_x.setter + def node_x(self, value): + """Setter for ``node_x``""" + assert isinstance(value, xr.DataArray) + self._ds["node_x"] = value + @property def node_y(self) -> xr.DataArray: """Cartesian y location of each node in meters. @@ -615,6 +639,12 @@ def node_y(self) -> xr.DataArray: _populate_node_xyz(self) return self._ds["node_y"] + @node_y.setter + def node_y(self, value): + """Setter for ``node_y``""" + assert isinstance(value, xr.DataArray) + self._ds["node_y"] = value + @property def node_z(self) -> xr.DataArray: """Cartesian z location of each node in meters. @@ -625,6 +655,12 @@ def node_z(self) -> xr.DataArray: _populate_node_xyz(self) return self._ds["node_z"] + @node_z.setter + def node_z(self, value): + """Setter for ``node_z``""" + assert isinstance(value, xr.DataArray) + self._ds["node_z"] = value + @property def edge_lon(self) -> xr.DataArray: """Longitude of the center of each edge in degrees. @@ -637,6 +673,12 @@ def edge_lon(self) -> xr.DataArray: _set_desired_longitude_range(self._ds) return self._ds["edge_lon"] + @edge_lon.setter + def edge_lon(self, value): + """Setter for ``edge_lon``""" + assert isinstance(value, xr.DataArray) + self._ds["edge_lon"] = value + @property def edge_lat(self) -> xr.DataArray: """Latitude of the center of each edge in degrees. @@ -648,6 +690,12 @@ def edge_lat(self) -> xr.DataArray: _set_desired_longitude_range(self._ds) return self._ds["edge_lat"] + @edge_lat.setter + def edge_lat(self, value): + """Setter for ``edge_lat``""" + assert isinstance(value, xr.DataArray) + self._ds["edge_lat"] = value + @property def edge_x(self) -> xr.DataArray: """Cartesian x location of the center of each edge in meters. @@ -659,6 +707,12 @@ def edge_x(self) -> xr.DataArray: return self._ds["edge_x"] + @edge_x.setter + def edge_x(self, value): + """Setter for ``edge_x``""" + assert isinstance(value, xr.DataArray) + self._ds["edge_x"] = value + @property def edge_y(self) -> xr.DataArray: """Cartesian y location of the center of each edge in meters. @@ -669,6 +723,12 @@ def edge_y(self) -> xr.DataArray: _populate_edge_centroids(self) return self._ds["edge_y"] + @edge_y.setter + def edge_y(self, value): + """Setter for ``edge_y``""" + assert isinstance(value, xr.DataArray) + self._ds["edge_y"] = value + @property def edge_z(self) -> xr.DataArray: """Cartesian z location of the center of each edge in meters. @@ -679,6 +739,12 @@ def edge_z(self) -> xr.DataArray: _populate_edge_centroids(self) return self._ds["edge_z"] + @edge_z.setter + def edge_z(self, value): + """Setter for ``edge_z``""" + assert isinstance(value, xr.DataArray) + self._ds["edge_z"] = value + @property def face_lon(self) -> xr.DataArray: """Longitude of the center of each face in degrees. @@ -690,6 +756,12 @@ def face_lon(self) -> xr.DataArray: _set_desired_longitude_range(self._ds) return self._ds["face_lon"] + @face_lon.setter + def face_lon(self, value): + """Setter for ``face_lon``""" + assert isinstance(value, xr.DataArray) + self._ds["face_lon"] = value + @property def face_lat(self) -> xr.DataArray: """Latitude of the center of each face in degrees. @@ -702,6 +774,12 @@ def face_lat(self) -> xr.DataArray: return self._ds["face_lat"] + @face_lat.setter + def face_lat(self, value): + """Setter for ``face_lat``""" + assert isinstance(value, xr.DataArray) + self._ds["face_lat"] = value + @property def face_x(self) -> xr.DataArray: """Cartesian x location of the center of each face in meters. @@ -713,6 +791,12 @@ def face_x(self) -> xr.DataArray: return self._ds["face_x"] + @face_x.setter + def face_x(self, value): + """Setter for ``face_x``""" + assert isinstance(value, xr.DataArray) + self._ds["face_x"] = value + @property def face_y(self) -> xr.DataArray: """Cartesian y location of the center of each face in meters. @@ -723,6 +807,12 @@ def face_y(self) -> xr.DataArray: _populate_face_centroids(self) return self._ds["face_y"] + @face_y.setter + def face_y(self, value): + """Setter for ``face_x``""" + assert isinstance(value, xr.DataArray) + self._ds["face_y"] = value + @property def face_z(self) -> xr.DataArray: """Cartesian z location of the center of each face in meters. @@ -733,6 +823,12 @@ def face_z(self) -> xr.DataArray: _populate_face_centroids(self) return self._ds["face_z"] + @face_z.setter + def face_z(self, value): + """Setter for ``face_z``""" + assert isinstance(value, xr.DataArray) + self._ds["face_z"] = value + @property def face_node_connectivity(self) -> xr.DataArray: """Indices of the nodes that make up each face. @@ -755,6 +851,12 @@ def face_node_connectivity(self) -> xr.DataArray: return self._ds["face_node_connectivity"] + @face_node_connectivity.setter + def face_node_connectivity(self, value): + """Setter for ``face_node_connectivity``""" + assert isinstance(value, xr.DataArray) + self._ds["face_node_connectivity"] = value + @property def edge_node_connectivity(self) -> xr.DataArray: """Indices of the two nodes that make up each edge. @@ -768,6 +870,12 @@ def edge_node_connectivity(self) -> xr.DataArray: return self._ds["edge_node_connectivity"] + @edge_node_connectivity.setter + def edge_node_connectivity(self, value): + """Setter for ``edge_node_connectivity``""" + assert isinstance(value, xr.DataArray) + self._ds["edge_node_connectivity"] = value + @property def node_node_connectivity(self) -> xr.DataArray: """Indices of the nodes that surround each node.""" @@ -777,6 +885,12 @@ def node_node_connectivity(self) -> xr.DataArray: ) return self._ds["node_node_connectivity"] + @node_node_connectivity.setter + def node_node_connectivity(self, value): + """Setter for ``node_node_connectivity``""" + assert isinstance(value, xr.DataArray) + self._ds["node_node_connectivity"] = value + @property def face_edge_connectivity(self) -> xr.DataArray: """Indices of the edges that surround each face. @@ -788,6 +902,12 @@ def face_edge_connectivity(self) -> xr.DataArray: return self._ds["face_edge_connectivity"] + @face_edge_connectivity.setter + def face_edge_connectivity(self, value): + """Setter for ``face_edge_connectivity``""" + assert isinstance(value, xr.DataArray) + self._ds["face_edge_connectivity"] = value + @property def edge_edge_connectivity(self) -> xr.DataArray: """Indices of the edges that surround each edge. @@ -801,6 +921,12 @@ def edge_edge_connectivity(self) -> xr.DataArray: return self._ds["edge_edge_connectivity"] + @edge_edge_connectivity.setter + def edge_edge_connectivity(self, value): + """Setter for ``edge_edge_connectivity``""" + assert isinstance(value, xr.DataArray) + self._ds["edge_edge_connectivity"] = value + @property def node_edge_connectivity(self) -> xr.DataArray: """Indices of the edges that surround each node.""" @@ -811,6 +937,12 @@ def node_edge_connectivity(self) -> xr.DataArray: return self._ds["node_edge_connectivity"] + @node_edge_connectivity.setter + def node_edge_connectivity(self, value): + """Setter for ``node_edge_connectivity``""" + assert isinstance(value, xr.DataArray) + self._ds["node_edge_connectivity"] = value + @property def face_face_connectivity(self) -> xr.DataArray: """Indices of the faces that surround each face. @@ -822,6 +954,12 @@ def face_face_connectivity(self) -> xr.DataArray: return self._ds["face_face_connectivity"] + @face_face_connectivity.setter + def face_face_connectivity(self, value): + """Setter for ``face_face_connectivity``""" + assert isinstance(value, xr.DataArray) + self._ds["face_face_connectivity"] = value + @property def edge_face_connectivity(self) -> xr.DataArray: """Indices of the faces that saddle each edge. @@ -833,6 +971,12 @@ def edge_face_connectivity(self) -> xr.DataArray: return self._ds["edge_face_connectivity"] + @edge_face_connectivity.setter + def edge_face_connectivity(self, value): + """Setter for ``edge_face_connectivity``""" + assert isinstance(value, xr.DataArray) + self._ds["edge_face_connectivity"] = value + @property def node_face_connectivity(self) -> xr.DataArray: """Indices of the faces that surround each node. @@ -844,6 +988,12 @@ def node_face_connectivity(self) -> xr.DataArray: return self._ds["node_face_connectivity"] + @node_face_connectivity.setter + def node_face_connectivity(self, value): + """Setter for ``node_face_connectivity``""" + assert isinstance(value, xr.DataArray) + self._ds["node_face_connectivity"] = value + @property def edge_node_distances(self): """Distances between the two nodes that surround each edge. @@ -854,6 +1004,12 @@ def edge_node_distances(self): _populate_edge_node_distances(self) return self._ds["edge_node_distances"] + @edge_node_distances.setter + def edge_node_distances(self, value): + """Setter for ``edge_node_distances``""" + assert isinstance(value, xr.DataArray) + self._ds["edge_node_distances"] = value + @property def edge_face_distances(self): """Distances between the centers of the faces that saddle each edge. @@ -864,6 +1020,12 @@ def edge_face_distances(self): _populate_edge_face_distances(self) return self._ds["edge_face_distances"] + @edge_face_distances.setter + def edge_face_distances(self, value): + """Setter for ``edge_face_distances``""" + assert isinstance(value, xr.DataArray) + self._ds["edge_face_distances"] = value + @property def antimeridian_face_indices(self) -> np.ndarray: """Index of each face that crosses the antimeridian.""" @@ -883,6 +1045,12 @@ def face_areas(self) -> xr.DataArray: ) return self._ds["face_areas"] + @face_areas.setter + def face_areas(self, value): + """Setter for ``face_areas``""" + assert isinstance(value, xr.DataArray) + self._ds["face_areas"] = value + @property def bounds(self): """Latitude Longitude Bounds for each Face in degrees. @@ -897,6 +1065,12 @@ def bounds(self): return self._ds["bounds"] + @bounds.setter + def bounds(self, value): + """Setter for ``bounds``""" + assert isinstance(value, xr.DataArray) + self._ds["bounds"] = value + @property def face_jacobian(self): """Declare face_jacobian as a property.""" @@ -904,6 +1078,10 @@ def face_jacobian(self): _ = self.face_areas return self._face_jacobian + def chunk(self): + # TODO: add general chunking method + pass + def get_ball_tree( self, coordinates: Optional[str] = "nodes", From aef8477f18c9e6ca83114bc9216c09da80085b5a Mon Sep 17 00:00:00 2001 From: Philip Chmielowiec <67855069+philipc2@users.noreply.github.com> Date: Wed, 28 Aug 2024 00:21:21 -0500 Subject: [PATCH 2/6] work on chunk method for Grid --- uxarray/grid/grid.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/uxarray/grid/grid.py b/uxarray/grid/grid.py index 657fd9a99..a464ee586 100644 --- a/uxarray/grid/grid.py +++ b/uxarray/grid/grid.py @@ -1078,8 +1078,16 @@ def face_jacobian(self): _ = self.face_areas return self._face_jacobian - def chunk(self): - # TODO: add general chunking method + def chunk(self, n_node="auto", n_edge="auto", n_face="auto"): + """Converts all arrays to dask arrays with given chunks across grid + dimensions. + + Non-dask arrays will be converted to dask arrays. Dask arrays will be chunked to the given chunk size. + + Parameters + ---------- + """ + pass def get_ball_tree( From 8d2f5500d8ea72556bac0f7bb71b88644318d1e1 Mon Sep 17 00:00:00 2001 From: Philip Chmielowiec <67855069+philipc2@users.noreply.github.com> Date: Wed, 28 Aug 2024 16:02:41 -0500 Subject: [PATCH 3/6] add dask tests, update grid chunk method --- test/test_dask.py | 42 ++++++++++++++++++++++++++++ uxarray/grid/grid.py | 65 ++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 test/test_dask.py diff --git a/test/test_dask.py b/test/test_dask.py new file mode 100644 index 000000000..cf0136dba --- /dev/null +++ b/test/test_dask.py @@ -0,0 +1,42 @@ +import uxarray as ux +import numpy as np +import dask.array as da + +import pytest +import os +from pathlib import Path + + + +current_path = Path(os.path.dirname(os.path.realpath(__file__))) + +mpas_grid = current_path / 'meshfiles' / "mpas" / "QU" / 'oQU480.231010.nc' + + +def test_grid_chunking(): + """Tests the chunking of an entire grid.""" + uxgrid = ux.open_grid(mpas_grid) + + for var in uxgrid._ds: + # variables should all be np.ndarray + assert isinstance(uxgrid._ds[var].data, np.ndarray) + + # chunk every data variable + uxgrid.chunk(n_node=1, n_face=2, n_edge=4) + + for var in uxgrid._ds: + # variables should all be da.Array + assert isinstance(uxgrid._ds[var].data, da.Array) + +def test_individual_var_chunking(): + """Tests the chunking of a single grid variable.""" + uxgrid = ux.open_grid(mpas_grid) + + # face_node_conn should originally be a numpy array + assert isinstance(uxgrid.face_node_connectivity.data, np.ndarray) + + # chunk face_node_connectivity + uxgrid.face_node_connectivity = uxgrid.face_node_connectivity.chunk(chunks={"n_face": 16}) + + # face_node_conn should now be a dask array + assert isinstance(uxgrid.face_node_connectivity.data, da.Array) diff --git a/uxarray/grid/grid.py b/uxarray/grid/grid.py index a464ee586..97b9f5d1f 100644 --- a/uxarray/grid/grid.py +++ b/uxarray/grid/grid.py @@ -496,6 +496,13 @@ def connectivity(self) -> set: return set([conn for conn in CONNECTIVITY_NAMES if conn in self._ds]) + @property + def descriptors(self) -> set: + """Names of all descriptor variables.""" + from uxarray.conventions.descriptors import DESCRIPTOR_NAMES + + return set([desc for desc in DESCRIPTOR_NAMES if desc in self._ds]) + @property def parsed_attrs(self) -> dict: """Dictionary of parsed attributes from the source grid.""" @@ -1080,15 +1087,69 @@ def face_jacobian(self): def chunk(self, n_node="auto", n_edge="auto", n_face="auto"): """Converts all arrays to dask arrays with given chunks across grid - dimensions. + dimensions in-place. Non-dask arrays will be converted to dask arrays. Dask arrays will be chunked to the given chunk size. Parameters ---------- + n_node : int, tuple + How to chunk node variables. Must be one of the following forms: + + - A blocksize like 1000. + - A blockshape like (1000, 1000). + - Explicit sizes of all blocks along all dimensions like + ((1000, 1000, 500), (400, 400)). + - A size in bytes, like "100 MiB" which will choose a uniform + block-like shape + - The word "auto" which acts like the above, but uses a configuration + value ``array.chunk-size`` for the chunk size + + -1 or None as a blocksize indicate the size of the corresponding + dimension. + + n_edge : int, tuple + How to chunk edge variables. Must be one of the following forms: + + - A blocksize like 1000. + - A blockshape like (1000, 1000). + - Explicit sizes of all blocks along all dimensions like + ((1000, 1000, 500), (400, 400)). + - A size in bytes, like "100 MiB" which will choose a uniform + block-like shape + - The word "auto" which acts like the above, but uses a configuration + value ``array.chunk-size`` for the chunk size + + -1 or None as a blocksize indicate the size of the corresponding + dimension. + + n_face : int, tuple + How to chunk face variables. Must be one of the following forms: + + - A blocksize like 1000. + - A blockshape like (1000, 1000). + - Explicit sizes of all blocks along all dimensions like + ((1000, 1000, 500), (400, 400)). + - A size in bytes, like "100 MiB" which will choose a uniform + block-like shape + - The word "auto" which acts like the above, but uses a configuration + value ``array.chunk-size`` for the chunk size + + -1 or None as a blocksize indicate the size of the corresponding + dimension. """ - pass + grid_var_names = self.coordinates | self.connectivity | self.descriptors + + for var_name in grid_var_names: + grid_var = getattr(self, var_name) + + if "n_node" in grid_var.dims: + setattr(self, var_name, grid_var.chunk(chunks={"n_node": n_node})) + elif "n_edge" in grid_var.dims: + setattr(self, var_name, grid_var.chunk(chunks={"n_edge": n_edge})) + elif "n_face" in grid_var.dims: + setattr(self, var_name, grid_var.chunk(chunks={"n_face": n_face})) def get_ball_tree( self, From 01e8c997579fe55eaa13fceeba32fe06307805e7 Mon Sep 17 00:00:00 2001 From: Philip Chmielowiec <67855069+philipc2@users.noreply.github.com> Date: Tue, 3 Sep 2024 12:41:25 -0500 Subject: [PATCH 4/6] fix chunking for non-standard dimensions --- test/test_dask.py | 9 +++++++++ uxarray/core/api.py | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/test/test_dask.py b/test/test_dask.py index cf0136dba..f3b555869 100644 --- a/test/test_dask.py +++ b/test/test_dask.py @@ -12,6 +12,9 @@ mpas_grid = current_path / 'meshfiles' / "mpas" / "QU" / 'oQU480.231010.nc' +csne30_grid = current_path / 'meshfiles' / "ugrid" / "outCSne30" / 'outCSne30.ug' +csne30_data = current_path / 'meshfiles' / "ugrid" / "outCSne30" / 'outCSne30_var2.nc' + def test_grid_chunking(): """Tests the chunking of an entire grid.""" @@ -40,3 +43,9 @@ def test_individual_var_chunking(): # face_node_conn should now be a dask array assert isinstance(uxgrid.face_node_connectivity.data, da.Array) + + +def test_uxds_chunking(): + uxds = ux.open_dataset(csne30_grid, csne30_data, chunks={"n_face": 4}) + + pass diff --git a/uxarray/core/api.py b/uxarray/core/api.py index 283122660..052ead1a3 100644 --- a/uxarray/core/api.py +++ b/uxarray/core/api.py @@ -165,6 +165,15 @@ def open_dataset( grid_filename_or_obj, latlon=latlon, use_dual=use_dual, **grid_kwargs ) + if "chunks" in kwargs: + # correctly chunk standardized ugrid dimension names + + source_dims_dict = uxgrid._source_dims_dict + + for original_grid_dim, ugrid_grid_dim in source_dims_dict.items(): + if ugrid_grid_dim in kwargs["chunks"]: + kwargs["chunks"][original_grid_dim] = kwargs["chunks"][ugrid_grid_dim] + # UxDataset ds = xr.open_dataset(filename_or_obj, **kwargs) # type: ignore From aed223a7fa713127c525884530c261a87a520dfa Mon Sep 17 00:00:00 2001 From: Philip Chmielowiec <67855069+philipc2@users.noreply.github.com> Date: Tue, 3 Sep 2024 12:47:08 -0500 Subject: [PATCH 5/6] add chunking fix to open_mfdataset --- uxarray/core/api.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/uxarray/core/api.py b/uxarray/core/api.py index 052ead1a3..204b10e31 100644 --- a/uxarray/core/api.py +++ b/uxarray/core/api.py @@ -167,9 +167,7 @@ def open_dataset( if "chunks" in kwargs: # correctly chunk standardized ugrid dimension names - source_dims_dict = uxgrid._source_dims_dict - for original_grid_dim, ugrid_grid_dim in source_dims_dict.items(): if ugrid_grid_dim in kwargs["chunks"]: kwargs["chunks"][original_grid_dim] = kwargs["chunks"][ugrid_grid_dim] @@ -263,6 +261,13 @@ def open_mfdataset( grid_filename_or_obj, latlon=latlon, use_dual=use_dual, **grid_kwargs ) + if "chunks" in kwargs: + # correctly chunk standardized ugrid dimension names + source_dims_dict = uxgrid._source_dims_dict + for original_grid_dim, ugrid_grid_dim in source_dims_dict.items(): + if ugrid_grid_dim in kwargs["chunks"]: + kwargs["chunks"][original_grid_dim] = kwargs["chunks"][ugrid_grid_dim] + # UxDataset ds = xr.open_mfdataset(paths, **kwargs) # type: ignore From 44616016e6192c3cd02189276502a0e05cb96c5c Mon Sep 17 00:00:00 2001 From: Philip Chmielowiec <67855069+philipc2@users.noreply.github.com> Date: Thu, 5 Sep 2024 16:38:03 -0500 Subject: [PATCH 6/6] add Grid.chunk to user api --- docs/user_api/index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/user_api/index.rst b/docs/user_api/index.rst index ac521c048..ec3278b6d 100644 --- a/docs/user_api/index.rst +++ b/docs/user_api/index.rst @@ -224,6 +224,7 @@ Methods Grid.get_kd_tree Grid.copy Grid.isel + Grid.chunk Dimensions