Skip to content

Commit

Permalink
Updates RaSP to throw an error on invalid inputs (#259)
Browse files Browse the repository at this point in the history
* Updates the error message to make sure the sequence is in alphabet in RaSP

* Adds a regression test, changes the error to a ValueError

* Updates version
  • Loading branch information
miguelgondu authored Sep 17, 2024
1 parent d448f7d commit c7e5e98
Show file tree
Hide file tree
Showing 6 changed files with 51 additions and 26 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "poli"
version = "1.0.0.dev8"
version = "1.0.0.dev9"
description = "poli, a library of discrete objective functions"
readme = "README.md"
authors = [{name="Miguel González-Duque", email="[email protected]"}, {name="Simon Bartels"}]
Expand Down Expand Up @@ -53,7 +53,7 @@ profile = "black"
exclude = ["src/poli/core/util/proteins/rasp/inner_rasp", "src/poli/objective_repository/gfp_cbas"]

[tool.bumpversion]
current_version = "1.0.0.dev8"
current_version = "1.0.0.dev9"
parse = """(?x)
(?P<major>0|[1-9]\\d*)\\.
(?P<minor>0|[1-9]\\d*)\\.
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = poli
version = "1.0.0.dev8"
version = "1.0.0.dev9"
author_email = [email protected]
description = Protein Objectives Library
long_description = file: README.md
Expand Down
2 changes: 1 addition & 1 deletion src/poli/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""poli, a library for discrete black-box objective functions."""

__version__ = "1.0.0.dev8"
__version__ = "1.0.0.dev9"
from .core.util.isolation.instancing import instance_function_as_isolated_process

# from .core import get_problems
Expand Down
45 changes: 23 additions & 22 deletions src/poli/core/util/proteins/rasp/rasp_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,28 @@ def __init__(
self.download_cavity_and_downstream_models(
verbose=verbose, verify_integrity_of_download=verify_integrity_of_download
)
self.alphabet = [
"A",
"C",
"D",
"E",
"F",
"G",
"H",
"I",
"K",
"L",
"M",
"N",
"P",
"Q",
"R",
"S",
"T",
"V",
"W",
"Y",
]

def get_and_compile_reduce(self):
"""
Expand Down Expand Up @@ -513,28 +535,7 @@ def create_df_structure(
df_structure_no_mt["pdbid"] = res_info["pdb_id"]
df_structure_no_mt["chainid"] = res_info["chain_id"]
df_structure_no_mt["variant"] = res_info["wt_AA"] + res_info["pos"] + "X"
aa_list = [
"A",
"C",
"D",
"E",
"F",
"G",
"H",
"I",
"K",
"L",
"M",
"N",
"P",
"Q",
"R",
"S",
"T",
"V",
"W",
"Y",
]
aa_list = self.alphabet

# Considering ALL single mutations in ALL sites.
wildtype_residue_string = "".join(
Expand Down
7 changes: 7 additions & 0 deletions src/poli/objective_repository/rasp/isolated_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,13 @@ def parse_pdb_as_residue_strings(self, pdb_file: Path) -> List[str]:
def _compute_mutant_residue_string_ddg(
self, mutant_residue_string: str
) -> np.ndarray:
for i, char in enumerate(mutant_residue_string):
if char not in self.rasp_interface.alphabet:
raise ValueError(
f"Invalid residue {char} at position {i} in the mutant "
"residue string. Please make sure that all residues are "
f"in the alphabet: {self.rasp_interface.alphabet}."
)
try:
(
closest_wildtype_pdb_file,
Expand Down
17 changes: 17 additions & 0 deletions src/poli/tests/registry/proteins/test_rasp.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,20 @@ def test_rasp_penalization_works_on_multiple_inputs():
combination = np.vstack([problem.x0, problematic_x])
y = f(combination)
assert y[-1] == -100.0


@pytest.mark.poli__rasp
def test_rasp_fails_on_invalid_amino_acids():
problem = objective_factory.create(
name="rasp",
wildtype_pdb_path=THIS_DIR / "3ned.pdb",
)
f, x0 = problem.black_box, problem.x0

# This is an unfeasible mutation, since joining
# all the strings would result in a sequence
# that is _not_ the same length as the wildtype.
problematic_x = x0.copy()
problematic_x[0][0] = "B"
with pytest.raises(ValueError):
f(problematic_x)

0 comments on commit c7e5e98

Please sign in to comment.