Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

create_array creates explicit groups #2795

Merged
merged 17 commits into from
Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/2795.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Alters the behavior of ``create_array`` to ensure that any groups implied by the array's name are created if they do not already exist. Also simplifies the type signature for any function that takes an ArrayConfig-like object.
6 changes: 3 additions & 3 deletions src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from typing_extensions import deprecated

from zarr.core.array import Array, AsyncArray, create_array, get_array_metadata
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArrayConfigParams
from zarr.core.buffer import NDArrayLike
from zarr.core.common import (
JSON,
Expand Down Expand Up @@ -856,7 +856,7 @@ async def create(
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
dimension_names: Iterable[str] | None = None,
storage_options: dict[str, Any] | None = None,
config: ArrayConfig | ArrayConfigLike | None = None,
config: ArrayConfigLike | None = None,
**kwargs: Any,
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
"""Create an array.
Expand Down Expand Up @@ -1018,7 +1018,7 @@ async def create(
mode = "a"
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)

config_dict: ArrayConfigLike = {}
config_dict: ArrayConfigParams = {}

if write_empty_chunks is not None:
if config is not None:
Expand Down
10 changes: 5 additions & 5 deletions src/zarr/api/synchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
SerializerLike,
ShardsLike,
)
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike
from zarr.core.array_spec import ArrayConfigLike
from zarr.core.buffer import NDArrayLike
from zarr.core.chunk_key_encodings import ChunkKeyEncoding, ChunkKeyEncodingLike
from zarr.core.common import (
Expand Down Expand Up @@ -625,7 +625,7 @@ def create(
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
dimension_names: Iterable[str] | None = None,
storage_options: dict[str, Any] | None = None,
config: ArrayConfig | ArrayConfigLike | None = None,
config: ArrayConfigLike | None = None,
**kwargs: Any,
) -> Array:
"""Create an array.
Expand Down Expand Up @@ -695,7 +695,7 @@ def create(
storage_options : dict
If using an fsspec URL to create the store, these will be passed to
the backend implementation. Ignored otherwise.
config : ArrayConfig or ArrayConfigLike, optional
config : ArrayConfigLike, optional
Runtime configuration of the array. If provided, will override the
default values from `zarr.config.array`.

Expand Down Expand Up @@ -761,7 +761,7 @@ def create_array(
dimension_names: Iterable[str] | None = None,
storage_options: dict[str, Any] | None = None,
overwrite: bool = False,
config: ArrayConfig | ArrayConfigLike | None = None,
config: ArrayConfigLike | None = None,
) -> Array:
"""Create an array.

Expand Down Expand Up @@ -853,7 +853,7 @@ def create_array(
Ignored otherwise.
overwrite : bool, default False
Whether to overwrite an array with the same name in the store, if one exists.
config : ArrayConfig or ArrayConfigLike, optional
config : ArrayConfigLike, optional
Runtime configuration for the array.

Returns
Expand Down
60 changes: 28 additions & 32 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ class AsyncArray(Generic[T_ArrayMetadata]):
The metadata of the array.
store_path : StorePath
The path to the Zarr store.
config : ArrayConfig, optional
config : ArrayConfigLike, optional
The runtime configuration of the array, by default None.

Attributes
Expand All @@ -246,22 +246,22 @@ def __init__(
self: AsyncArray[ArrayV2Metadata],
metadata: ArrayV2Metadata | ArrayV2MetadataDict,
store_path: StorePath,
config: ArrayConfig | None = None,
config: ArrayConfigLike | None = None,
) -> None: ...

@overload
def __init__(
self: AsyncArray[ArrayV3Metadata],
metadata: ArrayV3Metadata | ArrayV3MetadataDict,
store_path: StorePath,
config: ArrayConfig | None = None,
config: ArrayConfigLike | None = None,
) -> None: ...

def __init__(
self,
metadata: ArrayMetadata | ArrayMetadataDict,
store_path: StorePath,
config: ArrayConfig | None = None,
config: ArrayConfigLike | None = None,
) -> None:
if isinstance(metadata, dict):
zarr_format = metadata["zarr_format"]
Expand All @@ -275,12 +275,11 @@ def __init__(
raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3")

metadata_parsed = parse_array_metadata(metadata)

config = ArrayConfig.from_dict({}) if config is None else config
config_parsed = parse_array_config(config)

object.__setattr__(self, "metadata", metadata_parsed)
object.__setattr__(self, "store_path", store_path)
object.__setattr__(self, "_config", config)
object.__setattr__(self, "_config", config_parsed)
object.__setattr__(self, "codec_pipeline", create_codec_pipeline(metadata=metadata_parsed))

# this overload defines the function signature when zarr_format is 2
Expand All @@ -304,7 +303,7 @@ async def create(
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
config: ArrayConfig | ArrayConfigLike | None = None,
config: ArrayConfigLike | None = None,
) -> AsyncArray[ArrayV2Metadata]: ...

# this overload defines the function signature when zarr_format is 3
Expand Down Expand Up @@ -333,7 +332,7 @@ async def create(
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
config: ArrayConfig | ArrayConfigLike | None = None,
config: ArrayConfigLike | None = None,
) -> AsyncArray[ArrayV3Metadata]: ...

@overload
Expand Down Expand Up @@ -361,7 +360,7 @@ async def create(
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
config: ArrayConfig | ArrayConfigLike | None = None,
config: ArrayConfigLike | None = None,
) -> AsyncArray[ArrayV3Metadata]: ...

@overload
Expand Down Expand Up @@ -395,7 +394,7 @@ async def create(
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
config: ArrayConfig | ArrayConfigLike | None = None,
config: ArrayConfigLike | None = None,
) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]: ...

@classmethod
Expand Down Expand Up @@ -430,7 +429,7 @@ async def create(
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
config: ArrayConfig | ArrayConfigLike | None = None,
config: ArrayConfigLike | None = None,
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
"""Method to create a new asynchronous array instance.

Expand Down Expand Up @@ -508,7 +507,7 @@ async def create(
Whether to raise an error if the store already exists (default is False).
data : npt.ArrayLike, optional
The data to be inserted into the array (default is None).
config : ArrayConfig or ArrayConfigLike, optional
config : ArrayConfigLike, optional
Runtime configuration for the array.

Returns
Expand Down Expand Up @@ -571,7 +570,7 @@ async def _create(
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
config: ArrayConfig | ArrayConfigLike | None = None,
config: ArrayConfigLike | None = None,
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
"""Method to create a new asynchronous array instance.
See :func:`AsyncArray.create` for more details.
Expand Down Expand Up @@ -1745,7 +1744,7 @@ def create(
compressor: dict[str, JSON] | None = None,
# runtime
overwrite: bool = False,
config: ArrayConfig | ArrayConfigLike | None = None,
config: ArrayConfigLike | None = None,
) -> Array:
"""Creates a new Array instance from an initialized store.

Expand Down Expand Up @@ -1874,7 +1873,7 @@ def _create(
compressor: dict[str, JSON] | None = None,
# runtime
overwrite: bool = False,
config: ArrayConfig | ArrayConfigLike | None = None,
config: ArrayConfigLike | None = None,
) -> Array:
"""Creates a new Array instance from an initialized store.
See :func:`Array.create` for more details.
Expand Down Expand Up @@ -3814,7 +3813,8 @@ async def init_array(
chunk_key_encoding: ChunkKeyEncodingLike | None = None,
dimension_names: Iterable[str] | None = None,
overwrite: bool = False,
) -> ArrayV3Metadata | ArrayV2Metadata:
config: ArrayConfigLike | None,
) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]:
"""Create and persist an array metadata document.

Parameters
Expand Down Expand Up @@ -3893,11 +3893,13 @@ async def init_array(
Zarr format 3 only. Zarr format 2 arrays should not use this parameter.
overwrite : bool, default False
Whether to overwrite an array with the same name in the store, if one exists.
config : ArrayConfigLike or None, optional
Configuration for this array.

Returns
-------
ArrayV3Metadata | ArrayV2Metadata
The array metadata document.
AsyncArray
The AsyncArray.
"""

if zarr_format is None:
Expand Down Expand Up @@ -3997,14 +3999,9 @@ async def init_array(
attributes=attributes,
)

# save the metadata to disk
# TODO: make this easier -- it should be a simple function call that takes a {key: buffer}
coros = (
(store_path / key).set(value)
for key, value in meta.to_buffer_dict(default_buffer_prototype()).items()
)
await gather(*coros)
return meta
arr = AsyncArray(metadata=meta, store_path=store_path, config=config)
await arr._save_metadata(meta, ensure_parents=True)
return arr


async def create_array(
Expand All @@ -4027,7 +4024,7 @@ async def create_array(
dimension_names: Iterable[str] | None = None,
storage_options: dict[str, Any] | None = None,
overwrite: bool = False,
config: ArrayConfig | ArrayConfigLike | None = None,
config: ArrayConfigLike | None = None,
write_data: bool = True,
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
"""Create an array.
Expand Down Expand Up @@ -4117,7 +4114,7 @@ async def create_array(
Ignored otherwise.
overwrite : bool, default False
Whether to overwrite an array with the same name in the store, if one exists.
config : ArrayConfig or ArrayConfigLike, optional
config : ArrayConfigLike, optional
Runtime configuration for the array.
write_data : bool
If a pre-existing array-like object was provided to this function via the ``data`` parameter
Expand All @@ -4143,13 +4140,12 @@ async def create_array(
<AsyncArray memory://140349042942400 shape=(100, 100) dtype=int32>
"""
mode: Literal["a"] = "a"
config_parsed = parse_array_config(config)
store_path = await make_store_path(store, path=name, mode=mode, storage_options=storage_options)

data_parsed, shape_parsed, dtype_parsed = _parse_data_params(
data=data, shape=shape, dtype=dtype
)
meta = await init_array(
result = await init_array(
store_path=store_path,
shape=shape_parsed,
dtype=dtype_parsed,
Expand All @@ -4165,9 +4161,9 @@ async def create_array(
chunk_key_encoding=chunk_key_encoding,
dimension_names=dimension_names,
overwrite=overwrite,
config=config,
)

result = AsyncArray(metadata=meta, store_path=store_path, config=config_parsed)
if write_data is True and data_parsed is not None:
await result._set_selection(
BasicIndexer(..., shape=result.shape, chunk_grid=result.metadata.chunk_grid),
Expand Down
11 changes: 7 additions & 4 deletions src/zarr/core/array_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from zarr.core.common import ChunkCoords


class ArrayConfigLike(TypedDict):
class ArrayConfigParams(TypedDict):
"""
A TypedDict model of the attributes of an ArrayConfig class, but with no required fields.
This allows for partial construction of an ArrayConfig, with the assumption that the unset
Expand Down Expand Up @@ -56,13 +56,13 @@ def __init__(self, order: MemoryOrder, write_empty_chunks: bool) -> None:
object.__setattr__(self, "write_empty_chunks", write_empty_chunks_parsed)

@classmethod
def from_dict(cls, data: ArrayConfigLike) -> Self:
def from_dict(cls, data: ArrayConfigParams) -> Self:
"""
Create an ArrayConfig from a dict. The keys of that dict are a subset of the
attributes of the ArrayConfig class. Any keys missing from that dict will be set to the
the values in the ``array`` namespace of ``zarr.config``.
"""
kwargs_out: ArrayConfigLike = {}
kwargs_out: ArrayConfigParams = {}
for f in fields(ArrayConfig):
field_name = cast(Literal["order", "write_empty_chunks"], f.name)
if field_name not in data:
Expand All @@ -72,7 +72,10 @@ def from_dict(cls, data: ArrayConfigLike) -> Self:
return cls(**kwargs_out)


def parse_array_config(data: ArrayConfig | ArrayConfigLike | None) -> ArrayConfig:
ArrayConfigLike = ArrayConfig | ArrayConfigParams


def parse_array_config(data: ArrayConfigLike | None) -> ArrayConfig:
"""
Convert various types of data to an ArrayConfig.
"""
Expand Down
Loading