Merge branch 'main' into ig/zarr_v3

scverse · Jan 24, 2025 · a7d3bf7 · a7d3bf7
2 parents ae18de3 + 3d0105b
commit a7d3bf7
Show file tree

Hide file tree

Showing 6 changed files with 50 additions and 4 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.9.1
+    rev: v0.9.2
     hooks:
       - id: ruff
         args: ["--fix"]

diff --git a/docs/release-notes/999.feature.md b/docs/release-notes/999.feature.md
@@ -0,0 +1,2 @@
+{data}`None` values can now be serialized to `.h5ad` and `.zarr`,
+preserving e.g. {attr}`~anndata.AnnData.uns` structure through saving and loading {user}`flying-sheep`
diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py
@@ -327,6 +327,30 @@ def write_raw(
     _writer.write_elem(g, "varm", dict(raw.varm), dataset_kwargs=dataset_kwargs)
 
 
+########
+# Null #
+########
+
+
+@_REGISTRY.register_read(H5Array, IOSpec("null", "0.1.0"))
+@_REGISTRY.register_read(ZarrArray, IOSpec("null", "0.1.0"))
+def read_null(_elem, _reader) -> None:
+    return None
+
+
+@_REGISTRY.register_write(H5Group, type(None), IOSpec("null", "0.1.0"))
+def write_null_h5py(f, k, _v, _writer, dataset_kwargs=MappingProxyType({})):
+    f.create_dataset(k, data=h5py.Empty("f"), **dataset_kwargs)
+
+
+@_REGISTRY.register_write(ZarrGroup, type(None), IOSpec("null", "0.1.0"))
+def write_null_zarr(f, k, _v, _writer, dataset_kwargs=MappingProxyType({})):
+    import zarr
+
+    # zarr has no first-class null dataset
+    f.create_dataset(k, data=zarr.empty(()), **dataset_kwargs)
+
+
 ############
 # Mappings #
 ############

diff --git a/src/anndata/_io/specs/registry.py b/src/anndata/_io/specs/registry.py
@@ -336,9 +336,6 @@ def write_elem(
 
         dest_type = type(store)
 
-        if elem is None:
-            return lambda *_, **__: None
-
         # Normalize k to absolute path
         if (
             (isinstance(store, ZarrGroup) and is_zarr_v2())

diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py
@@ -126,6 +126,7 @@ def create_sparse_store(
 @pytest.mark.parametrize(
     ("value", "encoding_type"),
     [
+        pytest.param(None, "null", id="none"),
         pytest.param("hello world", "string", id="py_str"),
         pytest.param(np.str_("hello world"), "string", id="np_str"),
         pytest.param(np.array([1, 2, 3]), "array", id="np_arr_int"),

diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py
@@ -837,6 +837,28 @@ def test_adata_in_uns(tmp_path, diskfmt, roundtrip):
     assert_equal(orig, curr)
 
 
+@pytest.mark.parametrize(
+    "uns_val",
+    [
+        pytest.param(dict(base=None), id="dict_val"),
+        pytest.param(
+            pd.DataFrame(dict(col_0=["string", None])).convert_dtypes(), id="df"
+        ),
+    ],
+)
+def test_none_dict_value_in_uns(diskfmt, tmp_path, roundtrip, uns_val):
+    pth = tmp_path / f"adata_dtype.{diskfmt}"
+
+    orig = ad.AnnData(np.ones((3, 4)), uns=dict(val=uns_val))
+    with ad.settings.override(allow_write_nullable_strings=True):
+        curr = roundtrip(orig, pth)
+
+    if isinstance(orig.uns["val"], pd.DataFrame):
+        pd.testing.assert_frame_equal(curr.uns["val"], orig.uns["val"])
+    else:
+        assert curr.uns["val"] == orig.uns["val"]
+
+
 def test_io_dtype(tmp_path, diskfmt, dtype, roundtrip):
     pth = tmp_path / f"adata_dtype.{diskfmt}"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		{data}`None` values can now be serialized to `.h5ad` and `.zarr`,
		preserving e.g. {attr}`~anndata.AnnData.uns` structure through saving and loading {user}`flying-sheep`