diff --git a/doc/source/conf.py b/doc/source/conf.py index 37df17a..98750a4 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -24,6 +24,8 @@ "sphinx.ext.doctest", "sphinx.ext.napoleon", "sphinx.ext.intersphinx", + "IPython.sphinxext.ipython_directive", + "IPython.sphinxext.ipython_console_highlighting", ] templates_path = ["_templates"] @@ -63,3 +65,12 @@ "python": ("https://docs.python.org/3/", None), "xarray": ("https://docs.xarray.dev/en/stable", None), } + +# Exclude link file +exclude_patterns = ["_build", "links.rst"] + +# make rst_epilog a variable, so you can add other epilog parts to it +rst_epilog = "" +# Read link all targets from file +with open("links.rst") as f: + rst_epilog += f.read() diff --git a/doc/source/index.rst b/doc/source/index.rst index 0ed213f..5c1c802 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -12,5 +12,6 @@ xarray-ms documentation readme install + tutorial api changelog diff --git a/doc/source/links.rst b/doc/source/links.rst new file mode 100644 index 0000000..be445f9 --- /dev/null +++ b/doc/source/links.rst @@ -0,0 +1,19 @@ +.. + This file should not be included in the standard project + but is included in the rst_epilog in conf.py + +.. _SKAO: https://www.skao.int/ +.. _NRAO: https://public.nrao.edu/ +.. _msv2-spec: https://casa.nrao.edu/Memos/229.html +.. _msv4-spec: https://docs.google.com/spreadsheets/d/14a6qMap9M5r_vjpLnaBKxsR9TF4azN5LVdOxLacOX-s/ +.. _xradio: https://github.com/casangi/xradio +.. _dask-ms: https://github.com/ratt-ru/dask-ms +.. _arcae: https://github.com/ratt-ru/arcae +.. _dask: https://www.dask.org/ +.. _python-casacore: https://github.com/casacore/python-casacore/ +.. _xarray: https://xarray.dev/ +.. _xarray_backend: https://docs.xarray.dev/en/stable/internals/how-to-add-new-backend.html +.. _xarray_lazy: https://docs.xarray.dev/en/latest/internals/internal-design.html#lazy-indexing-classes +.. _xarray_indexing_and_selecting: https://docs.xarray.dev/en/latest/user-guide/indexing.html +.. _xarray_chunked_arrays: https://docs.xarray.dev/en/latest/internals/chunked-arrays.html +.. _zarr: https://zarr.dev/ diff --git a/doc/source/tutorial.rst b/doc/source/tutorial.rst new file mode 100644 index 0000000..ef8e135 --- /dev/null +++ b/doc/source/tutorial.rst @@ -0,0 +1,97 @@ +Tutorial +======== + +The `Measurement Set v2.0 `_ is a tabular format that +includes notions of regularity or, the shape of the data, in the MAIN table. +This is accomplished through the ``DATA_DESC_ID`` column which defines the +Spectral Window and Polarisation Configuration associated with each row: +the shape of the visibility in each row of the ``DATA`` column can +vary per-row. + +By contrast `Measurement Set v4.0 `_ specifies a +collection of Datasets of ndarrays on a regular grid. +To move data between the two formats, it is necessary to partition +or group MSv2 rows by the same shape and configuration. + +In xarray-ms, this is accomplished by specifying ``partition_columns`` +when opening a Measurement Set. +Different columns may be used to define the partition, but +:code:`[DATA_DESC_ID, FIELD_ID, OBSERVATION_ID]` is a reasonable choice. + +Opening a Measurement Set +------------------------- + +As xarray-ms implements an `xarray backend `_, +it is possible to use the standard :func:`xarray.open_dataset` +to open up a single partition of a Measurement Set. + +.. ipython:: python + :okwarning: + + import xarray_ms + from xarray_ms.testing.simulator import simulate + import xarray + + # Simulate a Measurement Set with 3 + # channel and polarisation configurations + ms = simulate("test.ms", data_description=[ + (8, ("XX", "XY", "YX", "YY")), + (4, ("RR", "LL")), + (16, ("RR", "RL", "LR", "LL"))]) + + ds = xarray.open_dataset(ms, + partition_columns=["DATA_DESC_ID", "FIELD_ID", "OBSERVATION_ID"]) + + ds + +Opening a specific partition +++++++++++++++++++++++++++++++ + +Because we've simulated multiple Data Description values in +our Measurement Set, xarray-ms has automatically opened the first partition +containing 8 frequencies and 4 linear polarisations. +To open the second partition a ``partition_key`` can be also be +passed to :func:`xarray.open_dataset`. + +.. ipython:: python + + ds = xarray.open_dataset(ms, + partition_columns=["DATA_DESC_ID", "FIELD_ID", "OBSERVATION_ID"], + partition_key=(("DATA_DESC_ID", 1), ("FIELD_ID", 0), ("OBSERVATION_ID", 0))) + + ds + +and it can be seen that the dataset refers to the second partition +containing 4 frequencies and 2 circular polarisations. + +Selecting a subset of the data +++++++++++++++++++++++++++++++ + +By default, :func:`xarray.open_dataset` will return a dataset +with a lazy view over the data. +xarray has extensive functionality for +`indexing and selecting data `_. + +For example, one could select select some specific dimensions out: + +.. ipython:: python + + ds = xarray.open_dataset(ms, + partition_columns=["DATA_DESC_ID", "FIELD_ID", "OBSERVATION_ID"], + partition_key=(("DATA_DESC_ID", 1), ("FIELD_ID", 0), ("OBSERVATION_ID", 0))) + + subds = ds.isel(time=slice(1, 3), baseline=[1, 3, 5], frequency=slice(2, 4)) + subds + +At this point, the dataset is still lazy -- no Data variables have been loaded +into memory. + +Loading in a lazy dataset ++++++++++++++++++++++++++ + +By calling load on the lazy dataset, all the Data Variables are loaded onto the +dataset as numpy arrays. + +.. ipython:: python + + subds.load() diff --git a/tests/conftest.py b/tests/conftest.py index f4caca5..15b0421 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,9 +4,7 @@ from xarray_ms.backend.msv2.structure import MSv2StructureFactory from xarray_ms.backend.msv2.table_factory import TableFactory -from xarray_ms.testing.simulator import MSStructureSimulator - -DEFAULT_SIM_PARAMS = {"ntime": 5, "data_description": [(8, ["XX", "XY", "YX", "YY"])]} +from xarray_ms.testing.simulator import DEFAULT_SIM_PARAMS, MSStructureSimulator @pytest.fixture(autouse=True) diff --git a/xarray_ms/backend/msv2/array.py b/xarray_ms/backend/msv2/array.py index 8b7dbd2..c24181a 100644 --- a/xarray_ms/backend/msv2/array.py +++ b/xarray_ms/backend/msv2/array.py @@ -14,6 +14,11 @@ def slice_length(s, max_len): + if isinstance(s, np.ndarray): + if s.ndim != 1: + raise NotImplementedError("Slicing with non-1D numpy arrays") + return len(s) + start, stop, step = s.indices(max_len) if step != 1: raise NotImplementedError(f"Slicing with steps {s} other than 1 not supported") diff --git a/xarray_ms/testing/simulator.py b/xarray_ms/testing/simulator.py index e9faa70..f5f0ca4 100644 --- a/xarray_ms/testing/simulator.py +++ b/xarray_ms/testing/simulator.py @@ -1,4 +1,6 @@ import dataclasses +import os +import tempfile import typing from typing import ( Any, @@ -17,6 +19,8 @@ # First of February 2023 FIRST_FEB_2023_MJDS = 2459976.50000 * 86400 +# Default simulation parameters +DEFAULT_SIM_PARAMS = {"ntime": 5, "data_description": [(8, ["XX", "XY", "YX", "YY"])]} # Additional Columns to add ADDITIONAL_COLUMNS = { @@ -383,3 +387,16 @@ def data_factory( ) return {column: (dims, data) for column, dims, data in np_arrays} + + +def simulate(name=None, **sim_params) -> str: + """ + Create a Measurement Set in a temporary directory, + with the given simulation parameters. + Return the directory + """ + tmpdir = tempfile.mkdtemp() + ms_path = os.path.join(tmpdir, name or "simulated.ms") + simulator = MSStructureSimulator(**{**DEFAULT_SIM_PARAMS, **sim_params}) + simulator.simulate_ms(ms_path) + return ms_path