Skip to content

Commit

Permalink
Merge branch 'main' into feat/batch-creation
Browse files Browse the repository at this point in the history
  • Loading branch information
d-v-b authored Feb 7, 2025
2 parents 24eab3a + a52048d commit 2b02996
Show file tree
Hide file tree
Showing 8 changed files with 64 additions and 6 deletions.
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ ci:
default_stages: [pre-commit, pre-push]
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.9.1
rev: v0.9.4
hooks:
- id: ruff
args: ["--fix", "--show-fixes"]
- id: ruff-format
- repo: https://github.com/codespell-project/codespell
rev: v2.3.0
rev: v2.4.1
hooks:
- id: codespell
args: ["-L", "fo,ihs,kake,te", "-S", "fixture"]
Expand All @@ -37,7 +37,7 @@ repos:
# Tests
- pytest
- repo: https://github.com/scientific-python/cookie
rev: 2024.08.19
rev: 2025.01.22
hooks:
- id: sp-repo-review
- repo: https://github.com/pre-commit/pygrep-hooks
Expand All @@ -50,6 +50,6 @@ repos:
hooks:
- id: numpydoc-validation
- repo: https://github.com/twisted/towncrier
rev: 23.11.0
rev: 24.8.0
hooks:
- id: towncrier-check
1 change: 1 addition & 0 deletions changes/2778.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Use removeprefix rather than replace when removing filename prefixes in `FsspecStore.list`
1 change: 1 addition & 0 deletions changes/2796.chore.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The docs environment is now built with ``astroid`` pinned to a version less than 4. This allows the docs to build in CI.
1 change: 1 addition & 0 deletions changes/2801.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Ensure utf8 compliant strings are used to construct numpy arrays in property-based tests
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ docs = [
'numcodecs[msgpack]',
'rich',
's3fs',
'astroid<4'
]


Expand Down Expand Up @@ -427,3 +428,6 @@ directory = 'changes'
filename = "docs/release-notes.rst"
underlines = ["-", "~", "^"]
issue_format = ":issue:`{issue}`"

[tool.codespell]
ignore-words-list = "astroid"
2 changes: 1 addition & 1 deletion src/zarr/storage/_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ async def set_partial_values(
async def list(self) -> AsyncIterator[str]:
# docstring inherited
allfiles = await self.fs._find(self.path, detail=False, withdirs=False)
for onefile in (a.replace(self.path + "/", "") for a in allfiles):
for onefile in (a.removeprefix(self.path + "/") for a in allfiles):
yield onefile

async def list_dir(self, prefix: str) -> AsyncIterator[str]:
Expand Down
31 changes: 31 additions & 0 deletions src/zarr/testing/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,37 @@ async def test_list_prefix(self, store: S) -> None:
expected = tuple(sorted(expected))
assert observed == expected

async def test_list_empty_path(self, store: S) -> None:
"""
Verify that list and list_prefix work correctly when path is an empty string,
i.e. no unwanted replacement occurs.
"""
data = self.buffer_cls.from_bytes(b"")
store_dict = {
"foo/bar/zarr.json": data,
"foo/bar/c/1": data,
"foo/baz/c/0": data,
}
await store._set_many(store_dict.items())

# Test list()
observed_list = await _collect_aiterator(store.list())
observed_list_sorted = sorted(observed_list)
expected_list_sorted = sorted(store_dict.keys())
assert observed_list_sorted == expected_list_sorted

# Test list_prefix() with an empty prefix
observed_prefix_empty = await _collect_aiterator(store.list_prefix(""))
observed_prefix_empty_sorted = sorted(observed_prefix_empty)
expected_prefix_empty_sorted = sorted(store_dict.keys())
assert observed_prefix_empty_sorted == expected_prefix_empty_sorted

# Test list_prefix() with a non-empty prefix
observed_prefix = await _collect_aiterator(store.list_prefix("foo/bar/"))
observed_prefix_sorted = sorted(observed_prefix)
expected_prefix_sorted = sorted(k for k in store_dict if k.startswith("foo/bar/"))
assert observed_prefix_sorted == expected_prefix_sorted

async def test_list_dir(self, store: S) -> None:
root = "foo"
store_dict = {
Expand Down
22 changes: 21 additions & 1 deletion src/zarr/testing/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,21 @@ def v2_dtypes() -> st.SearchStrategy[np.dtype]:
)


def safe_unicode_for_dtype(dtype: np.dtype[np.str_]) -> st.SearchStrategy[str]:
"""Generate UTF-8-safe text constrained to max_len of dtype."""
# account for utf-32 encoding (i.e. 4 bytes/character)
max_len = max(1, dtype.itemsize // 4)

return st.text(
alphabet=st.characters(
blacklist_categories=["Cs"], # Avoid *technically allowed* surrogates
min_codepoint=32,
),
min_size=1,
max_size=max_len,
)


# From https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#node-names
# 1. must not be the empty string ("")
# 2. must not include the character "/"
Expand Down Expand Up @@ -86,7 +101,12 @@ def numpy_arrays(
Generate numpy arrays that can be saved in the provided Zarr format.
"""
zarr_format = draw(zarr_formats)
return draw(npst.arrays(dtype=v3_dtypes() if zarr_format == 3 else v2_dtypes(), shape=shapes))
dtype = draw(v3_dtypes() if zarr_format == 3 else v2_dtypes())
if np.issubdtype(dtype, np.str_):
safe_unicode_strings = safe_unicode_for_dtype(dtype)
return draw(npst.arrays(dtype=dtype, shape=shapes, elements=safe_unicode_strings))

return draw(npst.arrays(dtype=dtype, shape=shapes))


@st.composite # type: ignore[misc]
Expand Down

0 comments on commit 2b02996

Please sign in to comment.