Skip to content

Commit

Permalink
Add support for unsigned masses in otherwise valid Proforma peptides. (
Browse files Browse the repository at this point in the history
…#52)

* Add support for sign missing in proforma string

* Minor refactoring and update

* Bump depenency versions
  • Loading branch information
wfondrie authored May 8, 2024
1 parent 8519369 commit 3ca2297
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 13 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [v0.4.6]
### Added
- Added support for unsigned modification masses that don't quite conform to the Proforma standard.

## [v0.4.5]
### Changed
- The `scan_id` column for parsed spectra is not a sting instead of an integer. This is less space efficient, but we ran into issues with Sciex indexing when trying to use only an integer.
Expand Down
42 changes: 30 additions & 12 deletions depthcharge/primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from rdkit.Chem import Draw
from spectrum_utils.spectrum import MsmsSpectrum

from . import utils
from .constants import PROTON

MSKB_TO_UNIMOD = {
Expand Down Expand Up @@ -72,17 +73,7 @@ def __post_init__(self) -> None:
if mod is None:
continue

try:
mod = [MassModification(mod)]
except ValueError:
try:
mod = [GenericModification(mod)]
except (AttributeError, TypeError):
pass
except TypeError:
pass

parsed[idx] = mod
parsed[idx] = [_resolve_mod(m) for m in utils.listify(mod)]

self.modifications = parsed
n_mod = self.modifications[0]
Expand Down Expand Up @@ -116,7 +107,7 @@ def split(self) -> list[str]:
except (AttributeError, ValueError):
modstr = f"[{mods[0].mass:+0.6f}]"
else:
modstr = f"[{sum([m.mass for m in mods]):+0.6f}]"
modstr = f"[{sum(m.mass for m in mods):+0.6f}]"

if not idx:
out.append(f"{modstr}-")
Expand Down Expand Up @@ -444,3 +435,30 @@ def to_tensor(self) -> torch.tensor:
"""
return torch.tensor(np.vstack([self.mz, self.intensity]).T)


def _resolve_mod(
mod: MassModification | GenericModification | str | float,
) -> MassModification | GenericModification:
"""Resolve the type of a modification.
Parameters
----------
mod : MassModification, GenericModification, str, or float
The modification to resolve.
Returns
-------
MassModification or GenericModification
The best modification for the input type.
"""
try:
mod = mod.value
except AttributeError:
pass

try:
return MassModification(float(mod))
except ValueError:
return GenericModification(str(mod))
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ dependencies = [
"polars>=0.19.0",
"pyarrow>=12.0.1",
"pylance>=0.7.5",
"pyteomics>=4.4.2",
"pyteomics>=4.7.2",
"numpy>=1.18.1",
"numba>=0.48.0",
"lxml>=4.9.1",
Expand All @@ -33,6 +33,7 @@ dependencies = [
"pillow>=9.4.0",
"spectrum-utils>=0.4.1",
"cloudpathlib>=0.18.1",
"psims>=1.3.3"
]
dynamic = ["version"]

Expand Down
6 changes: 6 additions & 0 deletions tests/unit_tests/test_primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ def test_peptide_init():
assert parsed.split() == expected


def test_almost_proforma():
"""Test a peptide lacking an explicit sign."""
parsed = Peptide.from_proforma("LES[79.0]LIEK")
assert parsed.split() == ["L", "E", "S[+79.000000]", "L", "I", "E", "K"]


def test_peptide_from_proforma():
"""Test proforma parsing."""
parsed = Peptide.from_proforma("LESLIEK/2")
Expand Down
6 changes: 6 additions & 0 deletions tests/unit_tests/test_tokenizers/test_peptides.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,9 @@ def test_single_peptide():
ion = tokenizer.calculate_precursor_ions("LESLIEK", 2)
expected = mass.fast_mass("LESLIEK", charge=2, ion_type="M")
torch.testing.assert_close(ion, torch.tensor([expected]))


def test_almost_compliant_proform():
"""Test initializing with a peptide without an expicit mass sign."""
tokenizer = PeptideTokenizer.from_proforma("[10]-EDITHR")
assert "[+10.000000]-" in tokenizer.residues

0 comments on commit 3ca2297

Please sign in to comment.