Skip to content

Commit

Permalink
test new metadata parsing
Browse files Browse the repository at this point in the history
  • Loading branch information
leoschwarz committed Oct 17, 2024
1 parent 9d4690e commit 0657dc1
Show file tree
Hide file tree
Showing 9 changed files with 150 additions and 3 deletions.
6 changes: 3 additions & 3 deletions src/depiction/persistence/imzml/parser/parse_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,11 @@ def ibd_checksums(self) -> dict[str, str]:
elements = self._etree.findall(f".//{self._ns}fileDescription/{self._ns}fileContent/{self._ns}cvParam")
checksums = {}
for element in elements:
if element.attrib["accession"] == "MS:1000568":
if element.attrib["accession"] in ("MS:1000568", "IMS:1000090"):
checksums["md5"] = element.attrib["value"].lower()
elif element.attrib["accession"] == "MS:1000569":
elif element.attrib["accession"] in ("MS:1000569", "IMS:1000091"):
checksums["sha1"] = element.attrib["value"].lower()
elif element.attrib["accession"] == "MS:1003151":
elif element.attrib["accession"] in ("MS:1003151", "IMS:1000092"):
checksums["sha256"] = element.attrib["value"].lower()
return checksums

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<mzML version="1.1.0" xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd">
<fileDescription>
<fileContent>
<cvParam accession="IMS:1000030" cvRef="IMS" name="continuous"/>
<cvParam accession="IMS:1000090" cvRef="IMS" name="ibd MD5" value="00000000000111111111112222222222"/>
<cvParam accession="MS:1000294" cvRef="MS" name="mass spectrum"/>
</fileContent>
</fileDescription>
</mzML>
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<mzML version="1.1.0" xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd">
<fileDescription>
<fileContent>
<cvParam accession="IMS:1000030" cvRef="IMS" name="continuous"/>
<cvParam accession="IMS:1000090" cvRef="IMS" name="ibd MD5" value="00000000000111111111112222222222"/>
<cvParam accession="IMS:1000092" cvRef="IMS" name="ibd SHA-256" value="aaaaaa"/>
<cvParam accession="MS:1000294" cvRef="MS" name="mass spectrum"/>
</fileContent>
</fileDescription>
</mzML>
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
<?xml version="1.0" encoding="UTF-8"?>
<mzML version="1.1.0" xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd">
</mzML>
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<mzML version="1.1.0" xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd">
<fileDescription>
<fileContent>
<cvParam accession="IMS:1000030" cvRef="IMS" name="continuous"/>
<cvParam accession="IMS:1000091" cvRef="IMS" name="ibd SHA-1" value="abcdef"/>
<cvParam accession="MS:1000294" cvRef="MS" name="mass spectrum"/>
</fileContent>
</fileDescription>
</mzML>
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<mzML version="1.1.0" xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd">
<fileDescription>
<fileContent>
<cvParam accession="IMS:1000030" cvRef="IMS" name="continuous"/>
<cvParam accession="IMS:1000092" cvRef="IMS" name="ibd SHA-256" value="aaaaaa"/>
<cvParam accession="MS:1000294" cvRef="MS" name="mass spectrum"/>
</fileContent>
</fileDescription>
</mzML>
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<mzML version="1.1.0" xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd">
<cvList count="3">
<cv URI="http://purl.obolibrary.org/obo/ms.obo" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" id="MS" version="4.1.35"/>
<cv URI="http://purl.obolibrary.org/obo/uo.obo" fullName="Unit Ontology" id="UO" version="2019-03-29"/>
<cv URI="http://www.maldi-msi.org/download/imzml/imagingMS.obo" fullName="Imaging MS Ontology" id="IMS" version="0.9.1"/>
</cvList>
<scanSettingsList count="1">
<scanSettings id="scanSettings0">
<cvParam accession="IMS:1000401" cvRef="IMS" name="top down"/>
<cvParam accession="IMS:1000410" cvRef="IMS" name="meandering"/>
<cvParam accession="IMS:1000490" cvRef="IMS" name="linescan right left"/>
<cvParam accession="IMS:1000480" cvRef="IMS" name="horizontal line scan"/>
<cvParam accession="IMS:1000042" cvRef="IMS" name="max count of pixels x" value="212"/>
<cvParam accession="IMS:1000043" cvRef="IMS" name="max count of pixels y" value="270"/>
<cvParam accession="IMS:1000044" cvRef="IMS" name="max dimension x" unitAccession="UO:0000017" unitCvRef="UO" unitName="micrometer" value="10600"/>
<cvParam accession="IMS:1000045" cvRef="IMS" name="max dimension y" unitAccession="UO:0000017" unitCvRef="UO" unitName="micrometer" value="13500"/>
<cvParam accession="IMS:1000053" cvRef="IMS" name="absolute position offset x" unitAccession="UO:0000017" unitCvRef="UO" unitName="micrometer" value="0"/>
<cvParam accession="IMS:1000054" cvRef="IMS" name="absolute position offset y" unitAccession="UO:0000017" unitCvRef="UO" unitName="micrometer" value="0"/>
<cvParam accession="IMS:1000046" cvRef="IMS" name="pixel size x" value="50"/>
<cvParam accession="IMS:1000047" cvRef="IMS" name="pixel size y" value="20"/>
</scanSettings>
</scanSettingsList>
</mzML>
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<mzML version="1.1.0" xmlns="http://psi.hupo.org/ms/mzml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psi.hupo.org/ms/mzml http://psidev.info/files/ms/mzML/xsd/mzML1.1.0.xsd">
<cvList count="3">
<cv URI="http://purl.obolibrary.org/obo/ms.obo" fullName="Proteomics Standards Initiative Mass Spectrometry Ontology" id="MS" version="4.1.35"/>
<cv URI="http://purl.obolibrary.org/obo/uo.obo" fullName="Unit Ontology" id="UO" version="2019-03-29"/>
<cv URI="http://www.maldi-msi.org/download/imzml/imagingMS.obo" fullName="Imaging MS Ontology" id="IMS" version="0.9.1"/>
</cvList>
<scanSettingsList count="1">
<scanSettings id="scanSettings0">
<cvParam accession="IMS:1000401" cvRef="IMS" name="top down"/>
<cvParam accession="IMS:1000410" cvRef="IMS" name="meandering"/>
<cvParam accession="IMS:1000490" cvRef="IMS" name="linescan right left"/>
<cvParam accession="IMS:1000480" cvRef="IMS" name="horizontal line scan"/>
<cvParam accession="IMS:1000042" cvRef="IMS" name="max count of pixels x" value="212"/>
<cvParam accession="IMS:1000043" cvRef="IMS" name="max count of pixels y" value="270"/>
<cvParam accession="IMS:1000044" cvRef="IMS" name="max dimension x" unitAccession="UO:0000017" unitCvRef="UO" unitName="micrometer" value="10600"/>
<cvParam accession="IMS:1000045" cvRef="IMS" name="max dimension y" unitAccession="UO:0000017" unitCvRef="UO" unitName="micrometer" value="13500"/>
<cvParam accession="IMS:1000053" cvRef="IMS" name="absolute position offset x" unitAccession="UO:0000017" unitCvRef="UO" unitName="micrometer" value="0"/>
<cvParam accession="IMS:1000054" cvRef="IMS" name="absolute position offset y" unitAccession="UO:0000017" unitCvRef="UO" unitName="micrometer" value="0"/>
</scanSettings>
</scanSettingsList>
</mzML>
57 changes: 57 additions & 0 deletions tests/integration/imzml_parser/test_parse_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from pathlib import Path
from xml.etree import ElementTree

import pytest

from depiction.persistence.imzml.parser.parse_metadata import ParseMetadata
from depiction.persistence.pixel_size import PixelSize


@pytest.fixture()
def xml_path(request) -> Path:
return Path(__file__).parent / "chunks" / "parse_metadata" / f"{request.param}.xml"


@pytest.fixture()
def etree(xml_path) -> ElementTree.ElementTree:
return ElementTree.ElementTree(ElementTree.fromstring(xml_path.read_text()))


@pytest.fixture()
def parse_metadata(etree) -> ParseMetadata:
return ParseMetadata(etree=etree)


@pytest.fixture()
def expected_checksums(xml_path) -> dict[str, str]:
if xml_path.stem == "checksums_none":
return {}
elif xml_path.stem == "checksums_md5":
return {"md5": "00000000000111111111112222222222"}
elif xml_path.stem == "checksums_sha1":
return {"sha1": "abcdef"}
elif xml_path.stem == "checksums_sha256":
return {"sha256": "aaaaaa"}
elif xml_path.stem == "checksums_multiple":
return {"md5": "00000000000111111111112222222222", "sha256": "aaaaaa"}
else:
raise NotImplementedError


@pytest.mark.parametrize(
"xml_path",
["checksums_none", "checksums_md5", "checksums_sha1", "checksums_sha256", "checksums_multiple"],
indirect=True,
)
def test_ibd_checksums(parse_metadata: ParseMetadata, expected_checksums) -> None:
assert parse_metadata.ibd_checksums == expected_checksums


@pytest.mark.parametrize("xml_path", ["pixel_size_2d", "pixel_size_none"], indirect=True)
def test_pixel_size(parse_metadata: ParseMetadata, xml_path: Path) -> None:
if xml_path.stem == "pixel_size_2d":
assert parse_metadata.pixel_size == PixelSize(size_x=50, size_y=20, unit="micrometer")
elif xml_path.stem == "pixel_size_none":
assert parse_metadata.pixel_size is None
else:
raise NotImplementedError

0 comments on commit 0657dc1

Please sign in to comment.