diff --git a/src/depiction/persistence/imzml/extract_metadata.py b/src/depiction/persistence/imzml/extract_metadata.py deleted file mode 100644 index 28b5627..0000000 --- a/src/depiction/persistence/imzml/extract_metadata.py +++ /dev/null @@ -1,54 +0,0 @@ -import warnings -from functools import cached_property -from pathlib import Path -from xml.etree.ElementTree import ElementTree - -import lxml.etree -from pydantic import BaseModel - -from depiction.persistence.imzml.parser.parse_metadata import ParseMetadata -from depiction.persistence.pixel_size import PixelSize - - -class Metadata(BaseModel): - pixel_size: PixelSize - data_processing: list[str] - software: list[str] - - -class ExtractMetadata: - """Handles the extraction of some metadata that is used for the data processing.""" - - def __init__(self, imzml_path: Path) -> None: - self._imzml_path = Path(imzml_path) - self._ns = "{http://psi.hupo.org/ms/mzml}" - - @classmethod - def extract_file(cls, path: Path) -> Metadata: - extractor = cls(path) - return Metadata( - pixel_size=extractor.pixel_size(), - data_processing=extractor.data_processing(), - software=extractor.software(), - ) - - @cached_property - def _etree(self) -> lxml.etree.ElementTree: - return lxml.etree.parse(str(self._imzml_path)) - - def pixel_size(self) -> PixelSize | None: - # TODO delete - warnings.warn("This method is deprecated, use `ParseMetadata.pixel_size` instead", DeprecationWarning) - etree = ElementTree(file=self._imzml_path) - return ParseMetadata(etree).pixel_size - - def data_processing(self) -> list[str]: - # each method will have some child accessions, for now we just parse it all into a flat string - # this should probably be improved in the future!! (but then we will simply upgrade to a different type that - # can hold the extra information, so it should be obvious whether the new code is used) - items = self._etree.findall(f".//{self._ns}processingMethod/{self._ns}cvParam") - return [item.attrib["name"] for item in items] - - def software(self) -> list[str]: - items = self._etree.findall(f".//{self._ns}software/{self._ns}cvParam") - return [item.attrib["name"] for item in items] diff --git a/src/depiction/persistence/imzml/metadata.py b/src/depiction/persistence/imzml/metadata.py new file mode 100644 index 0000000..f616954 --- /dev/null +++ b/src/depiction/persistence/imzml/metadata.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel + +from depiction.persistence.pixel_size import PixelSize + + +class Metadata(BaseModel): + pixel_size: PixelSize + data_processing: list[str] + software: list[str] + ibd_checksums: dict[str, str] diff --git a/src/depiction/persistence/imzml/parser/parse_metadata.py b/src/depiction/persistence/imzml/parser/parse_metadata.py index 19873fd..31576db 100644 --- a/src/depiction/persistence/imzml/parser/parse_metadata.py +++ b/src/depiction/persistence/imzml/parser/parse_metadata.py @@ -1,5 +1,9 @@ +from __future__ import annotations + +from pathlib import Path from xml.etree.ElementTree import ElementTree +from depiction.persistence.imzml.metadata import Metadata from depiction.persistence.pixel_size import PixelSize @@ -8,6 +12,18 @@ def __init__(self, etree: ElementTree) -> None: self._etree = etree self._ns = "{http://psi.hupo.org/ms/mzml}" + @classmethod + def from_file(cls, path: Path) -> ParseMetadata: + return cls(ElementTree(file=path)) + + def parse(self) -> Metadata: + return Metadata( + pixel_size=self.pixel_size, + data_processing=self.data_processing, + software=self.software, + ibd_checksums=self.ibd_checksums, + ) + @property def ibd_checksums(self) -> dict[str, str]: elements = self._etree.findall(f".//{self._ns}fileDescription/{self._ns}fileContent/{self._ns}cvParam") @@ -46,3 +62,16 @@ def pixel_size(self) -> PixelSize | None: return PixelSize(size_x=pixel_size_x, size_y=pixel_size_x, unit=unit) [pixel_size_y] = collect["pixel_size_y"] return PixelSize(size_x=pixel_size_x, size_y=pixel_size_y, unit=unit) + + @property + def data_processing(self) -> list[str]: + # each method will have some child accessions, for now we just parse it all into a flat string + # this should probably be improved in the future!! (but then we will simply upgrade to a different type that + # can hold the extra information, so it should be obvious whether the new code is used) + items = self._etree.findall(f".//{self._ns}processingMethod/{self._ns}cvParam") + return [item.attrib["name"] for item in items] + + @property + def software(self) -> list[str]: + items = self._etree.findall(f".//{self._ns}software/{self._ns}cvParam") + return [item.attrib["name"] for item in items] diff --git a/src/depiction_targeted_preproc/workflow/proc/export_raw_metadata.py b/src/depiction_targeted_preproc/workflow/proc/export_raw_metadata.py index 89e8b35..3ea9b33 100644 --- a/src/depiction_targeted_preproc/workflow/proc/export_raw_metadata.py +++ b/src/depiction_targeted_preproc/workflow/proc/export_raw_metadata.py @@ -5,7 +5,8 @@ from loguru import logger from pydantic import ValidationError -from depiction.persistence.imzml.extract_metadata import ExtractMetadata, Metadata +from depiction.persistence.imzml.metadata import Metadata +from depiction.persistence.imzml.parser.parse_metadata import ParseMetadata from depiction.persistence.pixel_size import PixelSize @@ -14,7 +15,7 @@ def proc_export_raw_metadata( output_json_path: Annotated[Path, typer.Option()], ) -> None: try: - metadata = ExtractMetadata.extract_file(input_imzml_path) + metadata = ParseMetadata.from_file(input_imzml_path).parse() except ValidationError: logger.error("Failed to extract metadata from {input_imzml_path}", input_imzml_path=input_imzml_path) logger.info("Using dummy metadata instead!") diff --git a/src/depiction_targeted_preproc/workflow/vis/images_ome_tiff.py b/src/depiction_targeted_preproc/workflow/vis/images_ome_tiff.py index 54901f5..ccc58f5 100644 --- a/src/depiction_targeted_preproc/workflow/vis/images_ome_tiff.py +++ b/src/depiction_targeted_preproc/workflow/vis/images_ome_tiff.py @@ -4,7 +4,7 @@ from depiction.image import MultiChannelImage from depiction.persistence.format_ome_tiff import OmeTiff -from depiction.persistence.imzml.extract_metadata import Metadata +from depiction.persistence.imzml.metadata import Metadata app = cyclopts.App()