From 3ca2297c63917a022081d13062030d34ae2b4fb9 Mon Sep 17 00:00:00 2001 From: Will Fondrie Date: Tue, 7 May 2024 22:59:38 -0700 Subject: [PATCH] Add support for unsigned masses in otherwise valid Proforma peptides. (#52) * Add support for sign missing in proforma string * Minor refactoring and update * Bump depenency versions --- CHANGELOG.md | 4 ++ depthcharge/primitives.py | 42 +++++++++++++------ pyproject.toml | 3 +- tests/unit_tests/test_primitives.py | 6 +++ .../test_tokenizers/test_peptides.py | 6 +++ 5 files changed, 48 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8be1135..e49edc2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [v0.4.6] +### Added +- Added support for unsigned modification masses that don't quite conform to the Proforma standard. + ## [v0.4.5] ### Changed - The `scan_id` column for parsed spectra is not a sting instead of an integer. This is less space efficient, but we ran into issues with Sciex indexing when trying to use only an integer. diff --git a/depthcharge/primitives.py b/depthcharge/primitives.py index b7b3e3a..d81382b 100644 --- a/depthcharge/primitives.py +++ b/depthcharge/primitives.py @@ -17,6 +17,7 @@ from rdkit.Chem import Draw from spectrum_utils.spectrum import MsmsSpectrum +from . import utils from .constants import PROTON MSKB_TO_UNIMOD = { @@ -72,17 +73,7 @@ def __post_init__(self) -> None: if mod is None: continue - try: - mod = [MassModification(mod)] - except ValueError: - try: - mod = [GenericModification(mod)] - except (AttributeError, TypeError): - pass - except TypeError: - pass - - parsed[idx] = mod + parsed[idx] = [_resolve_mod(m) for m in utils.listify(mod)] self.modifications = parsed n_mod = self.modifications[0] @@ -116,7 +107,7 @@ def split(self) -> list[str]: except (AttributeError, ValueError): modstr = f"[{mods[0].mass:+0.6f}]" else: - modstr = f"[{sum([m.mass for m in mods]):+0.6f}]" + modstr = f"[{sum(m.mass for m in mods):+0.6f}]" if not idx: out.append(f"{modstr}-") @@ -444,3 +435,30 @@ def to_tensor(self) -> torch.tensor: """ return torch.tensor(np.vstack([self.mz, self.intensity]).T) + + +def _resolve_mod( + mod: MassModification | GenericModification | str | float, +) -> MassModification | GenericModification: + """Resolve the type of a modification. + + Parameters + ---------- + mod : MassModification, GenericModification, str, or float + The modification to resolve. + + Returns + ------- + MassModification or GenericModification + The best modification for the input type. + + """ + try: + mod = mod.value + except AttributeError: + pass + + try: + return MassModification(float(mod)) + except ValueError: + return GenericModification(str(mod)) diff --git a/pyproject.toml b/pyproject.toml index 8d41c1b..8638a95 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,7 +19,7 @@ dependencies = [ "polars>=0.19.0", "pyarrow>=12.0.1", "pylance>=0.7.5", - "pyteomics>=4.4.2", + "pyteomics>=4.7.2", "numpy>=1.18.1", "numba>=0.48.0", "lxml>=4.9.1", @@ -33,6 +33,7 @@ dependencies = [ "pillow>=9.4.0", "spectrum-utils>=0.4.1", "cloudpathlib>=0.18.1", + "psims>=1.3.3" ] dynamic = ["version"] diff --git a/tests/unit_tests/test_primitives.py b/tests/unit_tests/test_primitives.py index 19fb2bc..631d4cd 100644 --- a/tests/unit_tests/test_primitives.py +++ b/tests/unit_tests/test_primitives.py @@ -50,6 +50,12 @@ def test_peptide_init(): assert parsed.split() == expected +def test_almost_proforma(): + """Test a peptide lacking an explicit sign.""" + parsed = Peptide.from_proforma("LES[79.0]LIEK") + assert parsed.split() == ["L", "E", "S[+79.000000]", "L", "I", "E", "K"] + + def test_peptide_from_proforma(): """Test proforma parsing.""" parsed = Peptide.from_proforma("LESLIEK/2") diff --git a/tests/unit_tests/test_tokenizers/test_peptides.py b/tests/unit_tests/test_tokenizers/test_peptides.py index bfd8785..b5f58e9 100644 --- a/tests/unit_tests/test_tokenizers/test_peptides.py +++ b/tests/unit_tests/test_tokenizers/test_peptides.py @@ -109,3 +109,9 @@ def test_single_peptide(): ion = tokenizer.calculate_precursor_ions("LESLIEK", 2) expected = mass.fast_mass("LESLIEK", charge=2, ion_type="M") torch.testing.assert_close(ion, torch.tensor([expected])) + + +def test_almost_compliant_proform(): + """Test initializing with a peptide without an expicit mass sign.""" + tokenizer = PeptideTokenizer.from_proforma("[10]-EDITHR") + assert "[+10.000000]-" in tokenizer.residues