Skip to content

Commit

Permalink
feat: read mzml in simsticher
Browse files Browse the repository at this point in the history
  • Loading branch information
jacobomiranda committed Nov 29, 2023
1 parent a15f6b8 commit 698363d
Show file tree
Hide file tree
Showing 2 changed files with 65 additions and 3 deletions.
53 changes: 53 additions & 0 deletions src/tools/peakStrainer/utils/peakStrainer_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ def decode_mzXML_Peaks(encodedPeaks):
Note zmass and intensity are together
"""
if isinstance(encodedPeaks, tuple): # assume its mzml
return decode_mz_ML_Peaks(encodedPeaks[0], encodedPeaks[1])

decoded = b64decode(encodedPeaks)
peaks = array("f", decoded)
Expand All @@ -49,6 +51,21 @@ def decode_mzXML_Peaks(encodedPeaks):
return mass, intens


def decode_mz_ML_Peaks(mass_encoded, inty_encoded):

mass_decoded = b64decode(mass_encoded)
inty_decoded = b64decode(inty_encoded)

mass_peaks = array("f", mass_decoded)
inty_peaks = array("f", inty_decoded)

if sys.byteorder != "big":
mass_peaks.byteswap()
inty_peaks.byteswap()

return mass_peaks, inty_peaks


def ThermoRawfile2Scans_sample(file_path):
log.info("file: %s", file_path)
rawfile = MSFileReader.ThermoRawfile(file_path)
Expand Down Expand Up @@ -145,6 +162,42 @@ def getMZXMLEncondedScans(filePath):
return list(zip(*rawscans))


def getMZ_MLEncondedScans(filePath):
# TODO:handle different namespaces of mzxml
ns = {
"mzml": "http://psi.hupo.org/ms/mzml",
"ms": "http://psi.hupo.org/ms/mzml",
}

tree = ET.parse(filePath)
root = tree.getroot()

spectrums = root.findall(".//mzml:spectrum", namespaces=ns)

rawscans = []
for spectrum in spectrums:
mz_array_elem = spectrum.find(
'.//ms:cvParam[@name="m/z array"]/../ms:binaryDataArray',
namespaces=ns,
)
intensity_array_elem = spectrum.find(
'.//ms:cvParam[@name="intensity array"]/../ms:binaryDataArray',
namespaces=ns,
)
encodedPeaks = (mz_array_elem, intensity_array_elem)
scanNo = int(spectrum.get("id").split("=")[-1])
filterLine = spectrum.find(
'.//ms:cvParam[@name="filter string"]', namespaces=ns
).get("value")
retTime = spectrum.find(
'.//ms:cvParam[@name="scan start time"]', namespaces=ns
).get("value")
object = (scanNo, filterLine, encodedPeaks, retTime)
rawscans.append(object)

return list(zip(*rawscans))


def write2templateMzXML(newfilename, scanPeaks):
namespaces = {
"xmlns": "http://sashimi.sourceforge.net/schema_revision/mzXML_3.0"
Expand Down
15 changes: 12 additions & 3 deletions src/tools/simStitching/simStitcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
log = logging.getLogger(os.path.basename(__file__))
from tools.peakStrainer.utils.peakStrainer_util import (
getMZXMLEncondedScans,
getMZ_MLEncondedScans,
decode_mzXML_Peaks,
write2templateMzXML,
encodePeaks,
Expand All @@ -23,7 +24,7 @@
import numpy as np
import xml.etree.ElementTree as ET
import copy

from pathlib import Path

class Scan(object):
class FilterLine(object):
Expand Down Expand Up @@ -628,7 +629,7 @@ def simStitcher(
filemode="w",
)

log.info("getMZXMLEncondedScans from" + filePath)
log.info("getMZ_MLEncondedScans from" + filePath)
scans_mzxml = getMZXMLEncondedScanRows(filePath)
log.debug("\n".join(map(str, scans_mzxml)))

Expand Down Expand Up @@ -883,7 +884,15 @@ def outputAdjustedFile(fileName):


def getMZXMLEncondedScanRows(filePath):
return list(zip(*getMZXMLEncondedScans(filePath)))
p = Path(filePath)
if p.suffix.lower() == ".mzxml":
return list(zip(*getMZXMLEncondedScans(filePath)))
if p.suffix.lower() == ".mzml":
# object = (scanNo, filterLine, encodedPeaks, retTime)
# rawscans = list[objects]
return list(zip(*getMZ_MLEncondedScans(filePath)))
else:
raise NotImplementedError(f" files of type {p.suffix} are not suppported")


if __name__ == "__main__":
Expand Down

0 comments on commit 698363d

Please sign in to comment.