diff --git a/docs/index.rst b/docs/index.rst index 270509a9..616cf39f 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -45,6 +45,7 @@ datasets `. - :doc:`using/subsetting` - :doc:`using/combining` - :doc:`using/selecting` +- :doc:`using/ensembles` - :doc:`using/grids` - :doc:`using/zip` - :doc:`using/statistics` @@ -65,6 +66,7 @@ datasets `. using/subsetting using/combining using/selecting + using/ensembles using/grids using/zip using/statistics diff --git a/docs/using/code/number1_.py b/docs/using/code/number1_.py new file mode 100644 index 00000000..089e8b57 --- /dev/null +++ b/docs/using/code/number1_.py @@ -0,0 +1,4 @@ +ds = open_dataset( + dataset, + number=1, +) diff --git a/docs/using/code/number2_.py b/docs/using/code/number2_.py new file mode 100644 index 00000000..3fd8d808 --- /dev/null +++ b/docs/using/code/number2_.py @@ -0,0 +1,4 @@ +ds = open_dataset( + dataset, + number=[1, 3, 5], +) diff --git a/docs/using/ensembles.rst b/docs/using/ensembles.rst new file mode 100644 index 00000000..ac109538 --- /dev/null +++ b/docs/using/ensembles.rst @@ -0,0 +1,27 @@ +.. _selecting-members: + +################### + Selecting members +################### + +This section describes how to subset data that are part of an ensemble. +To combine ensembles, see :ref:`ensembles` in the +:ref:`combining-datasets` section. + +.. _number: + +If a dataset is an ensemble, you can select one or more specific members +using the `number` option. You can also use ``numbers`` (which is an +alias for ``number``), and ``member`` (or ``members``). The difference +between the two is that ``number`` is **1-based**, while ``member`` is +**0-based**. + +Select a single element: + +.. literalinclude:: code/number1_.py + :language: python + +... or a list: + +.. literalinclude:: code/number2_.py + :language: python diff --git a/docs/using/selecting.rst b/docs/using/selecting.rst index ef21d9a3..cf3cf6d1 100644 --- a/docs/using/selecting.rst +++ b/docs/using/selecting.rst @@ -67,6 +67,28 @@ You can also rename variables: This will be useful when you join datasets and do not want variables from one dataset to override the ones from the other. +******** + number +******** + +If a dataset is an ensemble, you can select one or more specific members +using the `number` option. You can also use ``numbers`` (which is an +alias for ``number``), and ``member`` (or ``members``). The difference +between the two is that ``number`` is **1-based**, while ``member`` is +**0-based**. + +Select a single element: + +.. literalinclude:: code/number1_.py + :language: python + +... or a list: + +.. literalinclude:: code/number2_.py + :language: python + +.. _rescale: + ********* rescale ********* @@ -87,7 +109,9 @@ rescale the data. .. warning:: When providing units, the library assumes that the mapping between - them is a linear transformation. No check is does to ensure this is + them is a linear transformation. No check is done to ensure this is the case. .. _cfunits: https://github.com/NCAS-CMS/cfunits + +.. _number: diff --git a/src/anemoi/datasets/data/dataset.py b/src/anemoi/datasets/data/dataset.py index 0cb42959..a9bb6e4b 100644 --- a/src/anemoi/datasets/data/dataset.py +++ b/src/anemoi/datasets/data/dataset.py @@ -168,6 +168,16 @@ def __subset(self, **kwargs): bbox = kwargs.pop("area") return Cropping(self, bbox)._subset(**kwargs).mutate() + if "number" in kwargs or "numbers" or "member" in kwargs or "members" in kwargs: + from .ensemble import Number + + members = {} + for key in ["number", "numbers", "member", "members"]: + if key in kwargs: + members[key] = kwargs.pop(key) + + return Number(self, **members)._subset(**kwargs).mutate() + if "set_missing_dates" in kwargs: from .missing import MissingDates diff --git a/src/anemoi/datasets/data/ensemble.py b/src/anemoi/datasets/data/ensemble.py index 45cb07ca..460923db 100644 --- a/src/anemoi/datasets/data/ensemble.py +++ b/src/anemoi/datasets/data/ensemble.py @@ -10,13 +10,68 @@ import logging +import numpy as np + from .debug import Node +from .forwards import Forwards from .forwards import GivenAxis +from .indexing import apply_index_to_slices_changes +from .indexing import index_to_slices +from .indexing import update_tuple from .misc import _auto_adjust from .misc import _open LOG = logging.getLogger(__name__) +OFFSETS = dict(number=1, numbers=1, member=0, members=0) + + +class Number(Forwards): + def __init__(self, forward, **kwargs): + super().__init__(forward) + + self.members = [] + for key, values in kwargs.items(): + if not isinstance(values, (list, tuple)): + values = [values] + self.members.extend([int(v) - OFFSETS[key] for v in values]) + + self.members = sorted(set(self.members)) + for n in self.members: + if not (0 <= n < forward.shape[2]): + raise ValueError(f"Member {n} is out of range. `number(s)` is one-based, `member(s)` is zero-based.") + + self.mask = np.array([n in self.members for n in range(forward.shape[2])], dtype=bool) + self._shape, _ = update_tuple(forward.shape, 2, len(self.members)) + + @property + def shape(self): + return self._shape + + def __getitem__(self, index): + if isinstance(index, int): + result = self.forward[index] + result = result[:, self.mask, :] + return result + + if isinstance(index, slice): + result = self.forward[index] + result = result[:, :, self.mask, :] + return result + + index, changes = index_to_slices(index, self.shape) + result = self.forward[index] + result = result[:, :, self.mask, :] + return apply_index_to_slices_changes(result, changes) + + def tree(self): + return Node(self, [self.forward.tree()], numbers=[n + 1 for n in self.members]) + + def metadata_specific(self): + return { + "numbers": [n + 1 for n in self.members], + } + class Ensemble(GivenAxis): def tree(self): diff --git a/src/anemoi/datasets/data/merge.py b/src/anemoi/datasets/data/merge.py index 6921c2be..f9d4dbc3 100644 --- a/src/anemoi/datasets/data/merge.py +++ b/src/anemoi/datasets/data/merge.py @@ -134,6 +134,9 @@ def check_compatibility(self, d1, d2): def tree(self): return Node(self, [d.tree() for d in self.datasets], allow_gaps_in_dates=self.allow_gaps_in_dates) + def metadata_specific(self): + return {"allow_gaps_in_dates": self.allow_gaps_in_dates} + @debug_indexing def __getitem__(self, n): if isinstance(n, tuple):