Skip to content

Commit

Permalink
add GenericReadFile
Browse files Browse the repository at this point in the history
  • Loading branch information
leoschwarz committed Jul 5, 2024
1 parent 6a000e7 commit 023b5e6
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 14 deletions.
14 changes: 3 additions & 11 deletions src/depiction/persistence/imzml/imzml_read_file.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from __future__ import annotations
import shutil
from collections import defaultdict
from collections.abc import Generator
Expand All @@ -13,9 +14,10 @@
from depiction.persistence.imzml.imzml_mode_enum import ImzmlModeEnum
from depiction.persistence.imzml.imzml_reader import ImzmlReader
from depiction.persistence.pixel_size import PixelSize
from depiction.persistence.types import GenericReadFile


class ImzmlReadFile:
class ImzmlReadFile(GenericReadFile):
"""Represents a .imzML file and its accompanying .ibd file.
It provides several methods and properties to obtain general information about this file and verify its integrity.
However, to load the actual spectra, use the `reader` context manager or `get_reader` method to obtain a
Expand Down Expand Up @@ -68,13 +70,6 @@ def coordinates(self) -> NDArray[int]:
# TODO check if it isn't simply always 3d because of pyimzml
return self._cached_properties["coordinates"]

@property
def coordinates_2d(self) -> NDArray[int]:
"""Returns the spatial coordinates of the spectra in the .imzML file.
Shape: (n_spectra, 2) where the first two columns are the x and y coordinates."""
# TODO double check convention and update docstring accordingly
return self.coordinates[:, :2]

@property
def compact_metadata(self) -> dict[str, int | str | list[float]]:
"""Returns a compact representation of general metadata about the .imzML file, useful when comparing a large
Expand Down Expand Up @@ -165,9 +160,6 @@ def summary(self, checksums: bool = True) -> str:
f"{mz_range_line}"
)

def print_summary(self, checksums: bool = True, file: TextIO | None = None) -> None:
print(self.summary(checksums=checksums), file=file)

@cached_property
def pixel_size(self) -> PixelSize | None:
# TODO optimize and improve, error handling when missing
Expand Down
80 changes: 77 additions & 3 deletions src/depiction/persistence/types.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,25 @@
from __future__ import annotations

from functools import cached_property
from typing import TYPE_CHECKING, Self, Protocol
import numpy as np

if TYPE_CHECKING:
from types import TracebackType
from numpy.typing import NDArray
from collections.abc import Sequence
import numpy as np

from tqdm import tqdm

from collections.abc import Generator
from contextlib import contextmanager
from functools import cached_property
from pathlib import Path
from typing import Optional, TextIO

from tqdm import tqdm
from numpy.typing import NDArray

from depiction.persistence.imzml.imzml_mode_enum import ImzmlModeEnum
from depiction.persistence.pixel_size import PixelSize


# TODO better name
Expand Down Expand Up @@ -106,6 +112,70 @@ def get_spectra_mz_range(self, i_spectra: list[int] | None) -> tuple[float, floa
return mz_min, mz_max


class GenericReadFile(Protocol):
@contextmanager
def reader(self) -> Generator[GenericReader, None, None]:
"""Returns a context manager that yields an `ImzmlReader` instance."""
...

def get_reader(self) -> GenericReader: ...

@property
def n_spectra(self) -> int:
"""Returns the number of spectra in the .imzML file."""
...

@property
def imzml_mode(self) -> ImzmlModeEnum:
"""Returns the mode of the .imzML file (continuous or processed)."""
...

@property
def coordinates(self) -> NDArray[int]:
"""Returns the spatial coordinates of the spectra in the .imzML file.
Shape: (n_spectra, n_dimensions) where n_dimensions is 2 or 3 depending on the file."""
...

@property
def coordinates_2d(self) -> NDArray[int]:
"""Returns the spatial coordinates of the spectra in the .imzML file.
Shape: (n_spectra, 2) where the first two columns are the x and y coordinates."""
# TODO double check convention and update docstring accordingly
return self.coordinates[:, :2]

@property
def compact_metadata(self) -> dict[str, int | str | list[float]]:
"""Returns a compact representation of general metadata about the .imzML file, useful when comparing a large
number of files."""
# TODO should this really be here
...

def is_checksum_valid(self) -> Optional[bool]:
"""Returns True if the checksum of the .ibd file matches the expected value. False otherwise.
This operation can be slow for large files, but will be cached after the first call.
`None` is returned when checksum information is available.
"""
...

def summary(self, checksums: bool = True) -> str:
"""Returns a summary of the file."""

def print_summary(self, checksums: bool = True, file: TextIO | None = None) -> None:
"""Prints a summary of the file."""
print(self.summary(checksums=checksums), file=file)

@property
def pixel_size(self) -> PixelSize | None:
"""Returns pixel size information, if available."""
...

# TODO consider including in the generic interface
# def copy_to(self, path: Path) -> None:
# """Copies the file of this instance to the given path. Needs to end with .imzML."""
# shutil.copy(self.imzml_file, path)
# shutil.copy(self.ibd_file, path.with_suffix(".ibd"))


class GenericWriter(Protocol):

# TODO this currently does not impl __enter__ and __exit__ as GenericReader
Expand Down Expand Up @@ -152,3 +222,7 @@ def progress_fn(x: Sequence[int]) -> Sequence[int]:
for spectrum_index in progress_fn(spectra_indices):
mz_arr, int_arr, coordinates = reader.get_spectrum_with_coords(spectrum_index)
self.add_spectrum(mz_arr, int_arr, coordinates)


class GenericWriteFile(Protocol):
pass
21 changes: 21 additions & 0 deletions tests/unit/persistence/imzml/test_imzml_read_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from pytest_mock import MockerFixture

from depiction.persistence import ImzmlReadFile, ImzmlModeEnum
from depiction.persistence.pixel_size import PixelSize


@pytest.fixture()
Expand Down Expand Up @@ -265,6 +266,26 @@ def test_print_summary(mocker: MockerFixture, mock_read_file: ImzmlReadFile) ->
mock_print.assert_called_once_with(mock_summary.return_value, file=None)


def test_pixel_size_when_present(mocker: MockerFixture, mock_read_file) -> None:
mock_parser_constructor = mocker.patch("pyimzml.ImzMLParser.ImzMLParser")
mock_parser = mocker.MagicMock(name="mock_parser")
mock_parser_constructor.return_value.__enter__.return_value = mock_parser
mock_parser.metadata.pretty.return_value = {"scan_settings": {1: {"pixel size (x)": 10, "pixel size y": 20}}}
assert mock_read_file.pixel_size == PixelSize(10, 20, "micrometer")
mock_parser.metadata.pretty.assert_called_once_with()
mock_parser_constructor.assert_called_once_with(mock_read_file._path)


def test_pixel_size_when_none(mocker: MockerFixture, mock_read_file) -> None:
mock_parser_constructor = mocker.patch("pyimzml.ImzMLParser.ImzMLParser")
mock_parser = mocker.MagicMock(name="mock_parser")
mock_parser_constructor.return_value.__enter__.return_value = mock_parser
mock_parser.metadata.pretty.return_value = {"scan_settings": {1: {}}}
assert mock_read_file.pixel_size is None
mock_parser.metadata.pretty.assert_called_once_with()
mock_parser_constructor.assert_called_once_with(mock_read_file._path)


def test_copy_to(mocker: MockerFixture, mock_read_file: ImzmlReadFile) -> None:
mock_output_file = mocker.MagicMock(name="mock_output_file")
mock_copy = mocker.patch("shutil.copy")
Expand Down

0 comments on commit 023b5e6

Please sign in to comment.