Skip to content

Commit

Permalink
remove the redundant metadata parser code
Browse files Browse the repository at this point in the history
  • Loading branch information
leoschwarz committed Oct 17, 2024
1 parent 0657dc1 commit 4b02f4b
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 57 deletions.
54 changes: 0 additions & 54 deletions src/depiction/persistence/imzml/extract_metadata.py

This file was deleted.

10 changes: 10 additions & 0 deletions src/depiction/persistence/imzml/metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from pydantic import BaseModel

from depiction.persistence.pixel_size import PixelSize


class Metadata(BaseModel):
pixel_size: PixelSize
data_processing: list[str]
software: list[str]
ibd_checksums: dict[str, str]
29 changes: 29 additions & 0 deletions src/depiction/persistence/imzml/parser/parse_metadata.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from __future__ import annotations

from pathlib import Path
from xml.etree.ElementTree import ElementTree

from depiction.persistence.imzml.metadata import Metadata
from depiction.persistence.pixel_size import PixelSize


Expand All @@ -8,6 +12,18 @@ def __init__(self, etree: ElementTree) -> None:
self._etree = etree
self._ns = "{http://psi.hupo.org/ms/mzml}"

@classmethod
def from_file(cls, path: Path) -> ParseMetadata:
return cls(ElementTree(file=path))

def parse(self) -> Metadata:
return Metadata(
pixel_size=self.pixel_size,
data_processing=self.data_processing,
software=self.software,
ibd_checksums=self.ibd_checksums,
)

@property
def ibd_checksums(self) -> dict[str, str]:
elements = self._etree.findall(f".//{self._ns}fileDescription/{self._ns}fileContent/{self._ns}cvParam")
Expand Down Expand Up @@ -46,3 +62,16 @@ def pixel_size(self) -> PixelSize | None:
return PixelSize(size_x=pixel_size_x, size_y=pixel_size_x, unit=unit)
[pixel_size_y] = collect["pixel_size_y"]
return PixelSize(size_x=pixel_size_x, size_y=pixel_size_y, unit=unit)

@property
def data_processing(self) -> list[str]:
# each method will have some child accessions, for now we just parse it all into a flat string
# this should probably be improved in the future!! (but then we will simply upgrade to a different type that
# can hold the extra information, so it should be obvious whether the new code is used)
items = self._etree.findall(f".//{self._ns}processingMethod/{self._ns}cvParam")
return [item.attrib["name"] for item in items]

@property
def software(self) -> list[str]:
items = self._etree.findall(f".//{self._ns}software/{self._ns}cvParam")
return [item.attrib["name"] for item in items]
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from loguru import logger
from pydantic import ValidationError

from depiction.persistence.imzml.extract_metadata import ExtractMetadata, Metadata
from depiction.persistence.imzml.metadata import Metadata
from depiction.persistence.imzml.parser.parse_metadata import ParseMetadata
from depiction.persistence.pixel_size import PixelSize


Expand All @@ -14,7 +15,7 @@ def proc_export_raw_metadata(
output_json_path: Annotated[Path, typer.Option()],
) -> None:
try:
metadata = ExtractMetadata.extract_file(input_imzml_path)
metadata = ParseMetadata.from_file(input_imzml_path).parse()
except ValidationError:
logger.error("Failed to extract metadata from {input_imzml_path}", input_imzml_path=input_imzml_path)
logger.info("Using dummy metadata instead!")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from depiction.image import MultiChannelImage
from depiction.persistence.format_ome_tiff import OmeTiff
from depiction.persistence.imzml.extract_metadata import Metadata
from depiction.persistence.imzml.metadata import Metadata

app = cyclopts.App()

Expand Down

0 comments on commit 4b02f4b

Please sign in to comment.