diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3b547f702..b3f5d187c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.1 + rev: v0.9.2 hooks: - id: ruff args: ["--fix"] diff --git a/docs/release-notes/999.feature.md b/docs/release-notes/999.feature.md new file mode 100644 index 000000000..a34550052 --- /dev/null +++ b/docs/release-notes/999.feature.md @@ -0,0 +1,2 @@ +{data}`None` values can now be serialized to `.h5ad` and `.zarr`, +preserving e.g. {attr}`~anndata.AnnData.uns` structure through saving and loading {user}`flying-sheep` diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py index 252597d47..2167ea740 100644 --- a/src/anndata/_io/specs/methods.py +++ b/src/anndata/_io/specs/methods.py @@ -327,6 +327,30 @@ def write_raw( _writer.write_elem(g, "varm", dict(raw.varm), dataset_kwargs=dataset_kwargs) +######## +# Null # +######## + + +@_REGISTRY.register_read(H5Array, IOSpec("null", "0.1.0")) +@_REGISTRY.register_read(ZarrArray, IOSpec("null", "0.1.0")) +def read_null(_elem, _reader) -> None: + return None + + +@_REGISTRY.register_write(H5Group, type(None), IOSpec("null", "0.1.0")) +def write_null_h5py(f, k, _v, _writer, dataset_kwargs=MappingProxyType({})): + f.create_dataset(k, data=h5py.Empty("f"), **dataset_kwargs) + + +@_REGISTRY.register_write(ZarrGroup, type(None), IOSpec("null", "0.1.0")) +def write_null_zarr(f, k, _v, _writer, dataset_kwargs=MappingProxyType({})): + import zarr + + # zarr has no first-class null dataset + f.create_dataset(k, data=zarr.empty(()), **dataset_kwargs) + + ############ # Mappings # ############ diff --git a/src/anndata/_io/specs/registry.py b/src/anndata/_io/specs/registry.py index 13727fac3..2420d6af2 100644 --- a/src/anndata/_io/specs/registry.py +++ b/src/anndata/_io/specs/registry.py @@ -336,9 +336,6 @@ def write_elem( dest_type = type(store) - if elem is None: - return lambda *_, **__: None - # Normalize k to absolute path if ( (isinstance(store, ZarrGroup) and is_zarr_v2()) diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py index 0e419293a..5daa0790d 100644 --- a/tests/test_io_elementwise.py +++ b/tests/test_io_elementwise.py @@ -126,6 +126,7 @@ def create_sparse_store( @pytest.mark.parametrize( ("value", "encoding_type"), [ + pytest.param(None, "null", id="none"), pytest.param("hello world", "string", id="py_str"), pytest.param(np.str_("hello world"), "string", id="np_str"), pytest.param(np.array([1, 2, 3]), "array", id="np_arr_int"), diff --git a/tests/test_readwrite.py b/tests/test_readwrite.py index dfeda5021..3b683cc40 100644 --- a/tests/test_readwrite.py +++ b/tests/test_readwrite.py @@ -837,6 +837,28 @@ def test_adata_in_uns(tmp_path, diskfmt, roundtrip): assert_equal(orig, curr) +@pytest.mark.parametrize( + "uns_val", + [ + pytest.param(dict(base=None), id="dict_val"), + pytest.param( + pd.DataFrame(dict(col_0=["string", None])).convert_dtypes(), id="df" + ), + ], +) +def test_none_dict_value_in_uns(diskfmt, tmp_path, roundtrip, uns_val): + pth = tmp_path / f"adata_dtype.{diskfmt}" + + orig = ad.AnnData(np.ones((3, 4)), uns=dict(val=uns_val)) + with ad.settings.override(allow_write_nullable_strings=True): + curr = roundtrip(orig, pth) + + if isinstance(orig.uns["val"], pd.DataFrame): + pd.testing.assert_frame_equal(curr.uns["val"], orig.uns["val"]) + else: + assert curr.uns["val"] == orig.uns["val"] + + def test_io_dtype(tmp_path, diskfmt, dtype, roundtrip): pth = tmp_path / f"adata_dtype.{diskfmt}"