Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] 207 add the work of neidhart et al as a black box #214

Merged
merged 47 commits into from
Jul 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
adc4580
Add RMF objective, logic, info, and factory
Jul 9, 2024
0d29289
add inner_function on init, add module_to_import to inner_function get
Jul 9, 2024
ab9e72a
correction shape and object types
Jul 9, 2024
6dcbde8
add tests availability, isolation, batch_eval
Jul 9, 2024
980de11
added random state as class property
Jul 10, 2024
b3ed7b5
black, linting
Jul 10, 2024
33a0fa3
add test batch eval
Jul 10, 2024
4573492
corrections: compute HD, batched results in loop, OH to int encoding
Jul 12, 2024
71ea6de
add tests seeding and expected values
Jul 12, 2024
3122668
add environment marker
Jul 12, 2024
40851cc
add poli-rmf as testenv
Jul 12, 2024
ae59dfe
add github workflow
Jul 12, 2024
b5f419d
isort
Jul 12, 2024
0745169
move imports into env markers
Jul 12, 2024
902ed22
reblack
Jul 12, 2024
f15139d
Modifies the name of the isolated function
miguelgondu Jul 12, 2024
34e9db1
fix tox test commands
Jul 12, 2024
209a50a
no conda runs but shell invoke
Jul 12, 2024
8b5a399
Uses default observer if none is found in the config file (#221)
miguelgondu Jul 18, 2024
3a5a917
__future__ type annotations
Jul 25, 2024
4dc8d01
Add blackbox docstring
Jul 25, 2024
f0e9ea8
removed deprecated test
Jul 25, 2024
cf1035c
lint
Jul 25, 2024
cffffc3
Add RMF objective, logic, info, and factory
Jul 9, 2024
14cd184
add inner_function on init, add module_to_import to inner_function get
Jul 9, 2024
ad87418
correction shape and object types
Jul 9, 2024
1f48c49
add tests availability, isolation, batch_eval
Jul 9, 2024
ab0f5bc
added random state as class property
Jul 10, 2024
2a86532
black, linting
Jul 10, 2024
9f78d12
add test batch eval
Jul 10, 2024
fad4654
corrections: compute HD, batched results in loop, OH to int encoding
Jul 12, 2024
6e2962f
add tests seeding and expected values
Jul 12, 2024
7c8dacf
add environment marker
Jul 12, 2024
d326622
add poli-rmf as testenv
Jul 12, 2024
a75927a
add github workflow
Jul 12, 2024
5149e06
isort
Jul 12, 2024
bd83ca8
move imports into env markers
Jul 12, 2024
c9f737f
reblack
Jul 12, 2024
d699b85
Modifies the name of the isolated function
miguelgondu Jul 12, 2024
9de1704
fix tox test commands
Jul 12, 2024
695c9ee
no conda runs but shell invoke
Jul 12, 2024
5c8b514
__future__ type annotations
Jul 25, 2024
4cbce6e
Add blackbox docstring
Jul 25, 2024
90903ea
removed deprecated test
Jul 25, 2024
68dc275
lint
Jul 25, 2024
666647c
Merge branch '207-add-the-work-of-neidhart-et-al-as-a-black-box' of g…
Jul 25, 2024
4738787
isort on __future__
Jul 25, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions .github/workflows/python-tox-testing-rmf-env.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: poli rmf (conda, py3.9)

on:
push:
schedule:
- cron: '0 0 * * 0'

jobs:
build-linux:
runs-on: ubuntu-latest
strategy:
max-parallel: 5

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.9
uses: actions/setup-python@v3
with:
python-version: '3.9'
- name: Add conda to system path
run: |
# $CONDA is an environment variable pointing to the root of the miniconda directory
echo $CONDA/bin >> $GITHUB_PATH
- name: Install dependencies
run: |
python -m pip install tox
- name: Test rmf-related black boxes with tox and pytest
run: |
tox -c tox.ini -e poli-rmf-py39
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ markers = [
"poli__tdc: marks tests that run in the poli__tdc environment",
"poli__protein: marks tests that run in the poli__protein environment",
"poli__rasp: marks tests that run in the poli__rasp environment",
"poli__rmf: marks tests that run in poli__rmf environment",
"unmarked: All other tests, which usually run in the base environment",
]

Expand Down
3 changes: 3 additions & 0 deletions src/poli/objective_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,9 @@ def _instantiate_observer(observer_name: str, quiet: bool = False) -> AbstractOb
The black-box function, initial value, and related information.

"""
if _OBSERVER not in registry.config[_DEFAULT]:
registry.config[_DEFAULT][_OBSERVER] = _DEFAULT_OBSERVER_RUN_SCRIPT

observer_script: str = registry.config[_DEFAULT][_OBSERVER]
if observer_name is not None:
if observer_name != DEFAULT_OBSERVER_NAME:
Expand Down
3 changes: 3 additions & 0 deletions src/poli/objective_repository/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
from .rfp_foldx_stability_and_sasa.register import (
RFPFoldXStabilityAndSASAProblemFactory,
)
from .rmf_landscape.register import RMFBlackBox, RMFProblemFactory
from .sa_tdc.register import SABlackBox, SAProblemFactory
from .scaffold_hop.register import ScaffoldHopBlackBox, ScaffoldHopProblemFactory
from .sitagliptin_mpo.register import (
Expand Down Expand Up @@ -138,6 +139,7 @@
"rdkit_logp": LogPProblemFactory,
"rdkit_qed": QEDProblemFactory,
"rfp_foldx_stability_and_sasa": RFPFoldXStabilityAndSASAProblemFactory,
"rmf_landscape": RMFProblemFactory,
"sa_tdc": SAProblemFactory,
"super_mario_bros": SuperMarioBrosProblemFactory,
"white_noise": WhiteNoiseProblemFactory,
Expand Down Expand Up @@ -182,6 +184,7 @@
"rdkit_logp": LogPBlackBox,
"rdkit_qed": QEDBlackBox,
"rfp_foldx_stability_and_sasa": FoldXStabilityAndSASABlackBox,
"rmf_landscape": RMFBlackBox,
"sa_tdc": SABlackBox,
"super_mario_bros": SuperMarioBrosBlackBox,
"white_noise": WhiteNoiseBlackBox,
Expand Down
10 changes: 10 additions & 0 deletions src/poli/objective_repository/rmf_landscape/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""Rough Mount Fuji (RMF) fitness landscapes w/ tunable ruggedness using Numpy.
See J Neidhart, IG Szendro, J Krug
Adaptation in Tunably Rugged Fitness Landscapes: The Rough Mount Fuji Model.
Genetics 2014 .
DOI: https://doi.org/10.1534/genetics.114.167668
See Aita et al.
Analysis of a local fitness landscape with a model of the rough Mt. Fuji-type landscape: Application to prolyl endopeptidase and thermolysin.
Biopolymers 2000 .
DOI: https://doi.org/10.1002/(SICI)1097-0282(200007)54:1<64::AID-BIP70>3.0.CO;2-R
"""
11 changes: 11 additions & 0 deletions src/poli/objective_repository/rmf_landscape/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: poli__rmf
channels:
- conda-forge
- defaults
dependencies:
- python=3.9
- pip=23.2.1
- pip:
- numpy
- "git+https://github.com/MachineLearningLifeScience/poli.git@dev"
- scipy
15 changes: 15 additions & 0 deletions src/poli/objective_repository/rmf_landscape/information.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import numpy as np

from poli.core.black_box_information import BlackBoxInformation
from poli.core.util.proteins.defaults import AMINO_ACIDS

rmf_info = BlackBoxInformation(
name="rmf_landscape",
max_sequence_length=np.inf,
aligned=True,
fixed_length=True,
deterministic=False,
alphabet=AMINO_ACIDS, # TODO: differentiate between AA and NA inputs?
log_transform_recommended=False,
discrete=True,
)
132 changes: 132 additions & 0 deletions src/poli/objective_repository/rmf_landscape/isolated_function.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
from __future__ import annotations

import logging
from random import seed
from typing import List, Optional

import numpy as np
from scipy.spatial.distance import hamming
from scipy.stats import genpareto

from poli.core.abstract_isolated_function import AbstractIsolatedFunction
from poli.core.util.proteins.defaults import AMINO_ACIDS, ENCODING


class RMFIsolatedLogic(AbstractIsolatedFunction):
"""
RMF internal logic.

Parameters
----------
wildtype : List[str]
String sequence of the reference, default: None.
c : float, optional

alphabet : List[str]
Alphabet for the problem, by default AA list provided from poli.core.util.proteins.defaults
stochasticity: str, optional
Methods
-------
_black_box(x, context=None)
Main black box method to compute the fitness value of x relative to the WT.

Raises
------
AssertionError
If no wildtype sequence is provided.
"""

def __init__(
self,
wildtype: List[str],
wt_val: float | None = 0.0,
c: float | None = None,
kappa: float | None = 0.1,
alphabet: List[str] | None = None,
seed: int | None = 0,
) -> None:
"""
Initialize the RMFBlackBox object.
"""
assert wildtype is not None, (
"Missing reference input sequence. "
"Did you forget to pass it to the create of the black box?"
)
oracle_name = "RMF"
if not isinstance(wildtype, np.ndarray):
wildtype = np.array(list(wildtype))
self.wildtype = wildtype
self.seed = seed
if alphabet is None:
logging.info("using default alphabet AAs.")
alphabet = AMINO_ACIDS
assert all(
[aa in ENCODING.keys() for aa in wildtype]
), "Input wildtype elements not in encoding alphabet."
self.wt_int = np.array([ENCODING.get(aa) for aa in wildtype])
if c is None:
c = 1 / (len(alphabet) - 1)
else:
c = c
assert c >= 0, "Invalid c : c > 0 required!"
logging.info(f"setting c={c}")
# if c == 0 : uncorrelated HoC landscape (?)
self.c = c
self.kappa = kappa
self.f_0 = (
wt_val # in case of standardized observations (around WT) assume w.l.o.g.
)
self.alphabet = alphabet
eta_var = genpareto.stats(c, moments="v")
self.theta = c / np.sqrt(eta_var)
self.rng = np.random.default_rng(seed)
logging.info(f"landscape theta={self.theta}")
super().__init__()

@staticmethod
def f(
f0: float,
sigma: np.ndarray,
sigma_star: np.ndarray,
c: float,
kappa: float,
rand_state,
) -> float:
L = len(sigma)
# from [1] (2) additive term via Hamming distance and constant
# hamm_dist = hamming(sigma.flatten(), sigma_star.flatten()) # NOTE scipy HD is normalized, DON't USE
hamm_dist = np.sum(sigma != sigma_star)
# from [2] nonadd. term is single small value accroding to RV, we use [1]gen.Pareto RV instead of Gaussian
eta = genpareto.rvs(kappa, size=1, random_state=rand_state)
# NOTE [1] describes eta as 2^L i.i.d. RV vector, which does not yield a single function value
f_p = f0 + -c * hamm_dist
f_val = f_p + eta
return f_val

def __call__(self, x: np.ndarray, context=None) -> np.ndarray:
values = []
for sequence in x:
L = len(sequence)
assert L == self.wildtype.shape[-1], "Inconsistent length: undefined."
x_int = np.array([ENCODING.get(aa) for aa in sequence])
val = self.f(
f0=self.f_0,
sigma=x_int,
sigma_star=self.wt_int,
c=self.c,
kappa=self.kappa,
rand_state=self.rng,
)
values.append(val)
return np.array(values).reshape(-1, 1)


if __name__ == "__main__":
from poli.core.registry import register_isolated_function

register_isolated_function(
RMFIsolatedLogic,
name="rmf_landscape__isolated",
conda_environment_name="poli__rmf",
force=True,
)
Loading
Loading