From 5735e981e28ef417fed53511c8ac9622c25a621a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Gonz=C3=A1lez=20Duque?= Date: Tue, 25 Jun 2024 15:00:19 +0200 Subject: [PATCH 01/10] Updates the test for dockstring (#204) --- src/poli/tests/docs_examples/test_objective_functions.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/poli/tests/docs_examples/test_objective_functions.py b/src/poli/tests/docs_examples/test_objective_functions.py index 9d99d4a1..878709f6 100644 --- a/src/poli/tests/docs_examples/test_objective_functions.py +++ b/src/poli/tests/docs_examples/test_objective_functions.py @@ -142,7 +142,13 @@ def test_dockstring_example(): # Querying: y = f(x) print(y) # Should be 11.9 - assert np.isclose(y, 11.9).all() + + # As of 25/06/2024, the value changed from 11.9 to 11.8. + # Several potential culprits here: RDKit being modified + # to accomodate for numpy 2.0, or maybe OpenBabel... + + # An issue will be raised on DockString's repository. + assert np.isclose(y, 11.9, atol=1e-1).all() def test_drd3_docking_example(): From ac0645987aba765f7e77543498dc249375e07779 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Gonz=C3=A1lez=20Duque?= Date: Tue, 25 Jun 2024 15:15:23 +0200 Subject: [PATCH 02/10] Bump version 0.1.0 -> 0.2.0 --- CITATION.cff | 2 +- pyproject.toml | 2 +- setup.cfg | 2 +- src/__init__.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 7ecf9ef6..9712353d 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -8,6 +8,6 @@ authors: - family-names: "Michael" given-names: "Richard" title: "poli: a libary of discrete sequence objectives" -version: 0.1.0 +version: 0.2.0 date-released: 2024-01-23 url: "https://github.com/MachineLearningLifeScience/poli" \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 2edf6c29..a44d36c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "poli" -version = "0.1.0" +version = "0.2.0" description = "poli, a library of discrete objective functions" readme = "README.md" authors = [{name="Miguel González-Duque", email="miguelgondu@gmail.com"}, {name="Simon Bartels"}] diff --git a/setup.cfg b/setup.cfg index 85735b9b..51934c83 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = poli -version = 0.1.0 +version = 0.2.0 author_email = bartels@di.ku.dk description = Protein Objectives Library long_description = file: README.md diff --git a/src/__init__.py b/src/__init__.py index d5b431ce..f7559e77 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,2 +1,2 @@ __author__ = "Miguel González-Duque, Simon Bartels and Richard Michael" -__version__ = "0.1.0" +__version__ = "0.2.0" From 10b6cff1b1a93cd9b1148080a3bde93bd1c906c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Gonz=C3=A1lez=20Duque?= Date: Tue, 25 Jun 2024 15:29:30 +0200 Subject: [PATCH 03/10] bump version: 0.2.0 -> 0.2.1 before tag --- CITATION.cff | 2 +- pyproject.toml | 2 +- setup.cfg | 2 +- src/__init__.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 9712353d..0099dd69 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -8,6 +8,6 @@ authors: - family-names: "Michael" given-names: "Richard" title: "poli: a libary of discrete sequence objectives" -version: 0.2.0 +version: 0.2.1 date-released: 2024-01-23 url: "https://github.com/MachineLearningLifeScience/poli" \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index a44d36c7..f479db96 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "poli" -version = "0.2.0" +version = "0.2.1" description = "poli, a library of discrete objective functions" readme = "README.md" authors = [{name="Miguel González-Duque", email="miguelgondu@gmail.com"}, {name="Simon Bartels"}] diff --git a/setup.cfg b/setup.cfg index 51934c83..ad7d8ae3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = poli -version = 0.2.0 +version = 0.2.1 author_email = bartels@di.ku.dk description = Protein Objectives Library long_description = file: README.md diff --git a/src/__init__.py b/src/__init__.py index f7559e77..b60d2cad 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,2 +1,2 @@ __author__ = "Miguel González-Duque, Simon Bartels and Richard Michael" -__version__ = "0.2.0" +__version__ = "0.2.1" From 52d0dd80432b62a234270f82e4296e25261c516a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Gonz=C3=A1lez=20Duque?= Date: Thu, 11 Jul 2024 15:42:50 +0200 Subject: [PATCH 04/10] Adds Ehrlich functions as black boxes (#213) * Adds a first version of the Ehrlich function proposed by Stanton et al * Adds seeds for all functions in Ehrlich * Adds tests for motif matching, seeding and optimal sequence creation * Adds problem factory and associated tests * Adds docstrings * Addresses RMs comments --- src/poli/objective_repository/__init__.py | 5 + .../objective_repository/ehrlich/__init__.py | 0 .../ehrlich/_construct_feasibility_matrix.py | 63 +++ .../ehrlich/information.py | 15 + .../objective_repository/ehrlich/register.py | 451 ++++++++++++++++++ .../toy_discrete_problems/__init__.py | 0 .../toy_discrete_problems/test_ehrlich.py | 166 +++++++ 7 files changed, 700 insertions(+) create mode 100644 src/poli/objective_repository/ehrlich/__init__.py create mode 100644 src/poli/objective_repository/ehrlich/_construct_feasibility_matrix.py create mode 100644 src/poli/objective_repository/ehrlich/information.py create mode 100644 src/poli/objective_repository/ehrlich/register.py create mode 100644 src/poli/tests/registry/toy_discrete_problems/__init__.py create mode 100644 src/poli/tests/registry/toy_discrete_problems/test_ehrlich.py diff --git a/src/poli/objective_repository/__init__.py b/src/poli/objective_repository/__init__.py index e6ba21c8..66debbe2 100644 --- a/src/poli/objective_repository/__init__.py +++ b/src/poli/objective_repository/__init__.py @@ -11,6 +11,9 @@ ToyContinuousBlackBox, ) +# Discrete toy examples +from .ehrlich.register import EhrlichBlackBox, EhrlichProblemFactory + # Chemistry black boxes from .dockstring.register import DockstringProblemFactory, DockstringBlackBox from .drd3_docking.register import DRD3ProblemFactory, DRD3BlackBox @@ -142,6 +145,7 @@ AVAILABLE_PROBLEM_FACTORIES = { "aloha": AlohaProblemFactory, + "ehrlich": EhrlichProblemFactory, "dockstring": DockstringProblemFactory, "drd3_docking": DRD3ProblemFactory, "foldx_rfp_lambo": FoldXRFPLamboProblemFactory, @@ -185,6 +189,7 @@ AVAILABLE_BLACK_BOXES = { "aloha": AlohaBlackBox, + "ehrlich": EhrlichBlackBox, "dockstring": DockstringBlackBox, "drd3_docking": DRD3BlackBox, "foldx_rfp_lambo": FoldXRFPLamboBlackBox, diff --git a/src/poli/objective_repository/ehrlich/__init__.py b/src/poli/objective_repository/ehrlich/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/poli/objective_repository/ehrlich/_construct_feasibility_matrix.py b/src/poli/objective_repository/ehrlich/_construct_feasibility_matrix.py new file mode 100644 index 00000000..e6600948 --- /dev/null +++ b/src/poli/objective_repository/ehrlich/_construct_feasibility_matrix.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +import numpy as np + + +def _construct_banded_matrix(size: int) -> np.ndarray: + """ + Constructs a matrix of zeroes and ones, where + the ones are bands that can loop around. + """ + matrix = np.zeros((size, size), dtype=int) + band_index = 0 + band_length = size - 1 + for row_i in range(size): + indices_for_positions_that_will_be_1 = list( + range(band_index, band_index + band_length) + ) + + # Looping the ones that go over the limit + for i in range(len(indices_for_positions_that_will_be_1)): + if indices_for_positions_that_will_be_1[i] >= size: + indices_for_positions_that_will_be_1[i] -= size + + matrix[row_i, indices_for_positions_that_will_be_1] = 1 + + band_index += 1 + + return matrix + + +def _construct_binary_mask(size: int) -> np.ndarray: + banded_matrix = _construct_banded_matrix(size) + + # Shuffle its rows + random_indices_for_rows = np.random.permutation(size) + binary_mask_matrix = banded_matrix[random_indices_for_rows] + + # Making sure that the diagonal is full + # of ones + binary_mask_matrix[np.diag_indices(size)] = 1 + + return binary_mask_matrix + + +def _construct_transition_matrix(size: int, seed: int | None = None) -> np.ndarray: + binary_mask_matrix = _construct_binary_mask(size) + + # Creating a random state and matrix + random_state = np.random.RandomState(seed) + random_matrix = random_state.randn(size, size) + + # Softmax it + transition_matrix = np.exp(random_matrix) / np.sum(np.exp(random_matrix), axis=0) + + # Mask it + masked_transition_matrix = transition_matrix * binary_mask_matrix + + # Normalize it + normalized_transition_matrix = masked_transition_matrix / np.sum( + masked_transition_matrix, axis=1, keepdims=True + ) + + return normalized_transition_matrix diff --git a/src/poli/objective_repository/ehrlich/information.py b/src/poli/objective_repository/ehrlich/information.py new file mode 100644 index 00000000..45e4032a --- /dev/null +++ b/src/poli/objective_repository/ehrlich/information.py @@ -0,0 +1,15 @@ +import numpy as np + +from poli.core.black_box_information import BlackBoxInformation + +ehrlich_info = BlackBoxInformation( + name="ehrlich", + max_sequence_length=np.inf, + aligned=True, + fixed_length=True, + deterministic=True, # ? + alphabet=None, # TODO: add alphabet once we settle for one for SMLIES/SELFIES. + log_transform_recommended=False, + discrete=True, + padding_token="", +) diff --git a/src/poli/objective_repository/ehrlich/register.py b/src/poli/objective_repository/ehrlich/register.py new file mode 100644 index 00000000..70e89bb2 --- /dev/null +++ b/src/poli/objective_repository/ehrlich/register.py @@ -0,0 +1,451 @@ +""" +This module implements Ehrlich functions as black boxes in poli. + +Ehrlich functions were proposed by Stanton et al. [1] as a quick-and-easy +alternative for testing discrete sequence optimizers (with protein +optimization in mind). They are deviced to + +(i) be easy to query, +(ii) have feasible and unfeasible sequences, +(iii) have uninformative random samples (i.e. randomly sampling + and evaluating should not be competitive, as many of these + should be unfeasible). +(iv) be maximized when certain motifs are present in the sequence. + These motifs can be long-range within the sequence, and are + meant to be non-additive. + +Check the references for details on the implementation. + +References +---------- +[1] Stanton, S., Alberstein, R., Frey, N., Watkins, A., & Cho, K. (2024). + Closed-Form Test Functions for Biophysical Sequence Optimization Algorithms. + arXiv preprint arXiv:2407.00236. https://arxiv.org/abs/2407.00236 +""" + +from __future__ import annotations + +import numpy as np + +from poli.core.abstract_black_box import AbstractBlackBox +from poli.core.abstract_problem_factory import AbstractProblemFactory + +from poli.core.black_box_information import BlackBoxInformation +from poli.core.problem import Problem +from poli.objective_repository.ehrlich._construct_feasibility_matrix import ( + _construct_transition_matrix, +) +from poli.objective_repository.ehrlich.information import ( + ehrlich_info, +) + +from poli.core.util.seeding import seed_python_numpy_and_torch + +from poli.core.util.proteins.defaults import AMINO_ACIDS + + +class EhrlichBlackBox(AbstractBlackBox): + """ + Ehrlich functions were proposed by Stanton et al. [1] as a quick-and-easy + alternative for testing discrete sequence optimizers (with protein + optimization in mind). They are deviced to + + (i) be easy to query, + (ii) have feasible and unfeasible sequences, + (iii) have uninformative random samples (i.e. randomly sampling + and evaluating should not be competitive, as many of these + should be unfeasible). + (iv) be maximized when certain motifs are present in the sequence. + These motifs can be long-range within the sequence, and are + meant to be non-additive. + + Check the references for details on the implementation. + + Parameters + ---------- + sequence_length : int + The length of the sequence to be optimized. This length is fixed, and + _only_ sequences of this length are considered. + motif_length : int + The length of the motifs. + n_motifs : int + The number of motifs. + quantization : int, optional + The quantization parameter. This parameter must be between 1 and the + motif length, and the motif length must be divisible by the quantization. + By default, it is None (which corresponds to the motif length). + seed : int, optional + The seed for the random number generator. By default, it is None + (i.e. no seed is set). + alphabet : list of str, optional + The alphabet to be used for the sequences. By default, it is the + of 20 amino acids. + batch_size : int, optional + The batch size for the black box. By default, it is None (i.e. all + sequences are evaluated in a vectorized way). + parallelize : bool, optional + Whether to parallelize the evaluation of the black box. By default, + it is False. + num_workers : int, optional + The number of processors used in parallelization. + evaluation_budget : int, optional + The evaluation budget for the black box. By default, it is infinite. + + References + ---------- + [1] Stanton, S., Alberstein, R., Frey, N., Watkins, A., & Cho, K. (2024). + Closed-Form Test Functions for Biophysical Sequence Optimization Algorithms. + arXiv preprint arXiv:2407.00236. https://arxiv.org/abs/2407.00236 + + """ + + def __init__( + self, + sequence_length: int, + motif_length: int, + n_motifs: int, + quantization: int | None = None, + seed: int = None, + alphabet: list[str] = AMINO_ACIDS, + batch_size: int = None, + parallelize: bool = False, + num_workers: int = None, + evaluation_budget: int = float("inf"), + ): + super().__init__(batch_size, parallelize, num_workers, evaluation_budget) + self.alphabet = alphabet + self.sequence_length = sequence_length + + if seed is not None: + seed_python_numpy_and_torch(seed) + + if motif_length * n_motifs > sequence_length: + raise ValueError( + "The total length of the motifs is greater than the sequence length." + ) + + if quantization is None: + quantization = motif_length + + if not (1 <= quantization <= motif_length) or motif_length % quantization != 0: + raise ValueError( + "The quantization parameter must be between 1 and the motif length, " + "and the motif length must be divisible by the quantization." + ) + + self.motif_length = motif_length + self.n_motifs = n_motifs + self.quantization = quantization + + self.transition_matrix = _construct_transition_matrix( + size=len(alphabet), + seed=seed, + ) + + self.motifs = self.construct_random_motifs( + motif_length=motif_length, + n_motifs=n_motifs, + seed=seed, + ) + self.offsets = self.construct_random_offsets( + motif_length=motif_length, + n_motifs=n_motifs, + seed=seed, + ) + + def _sample_random_sequence( + self, + length: int | None = None, + random_state: int | np.random.RandomState | None = None, + repeating_allowed: bool = True, + ) -> str: + """ + Uses the sparse transition matrix to generate a random sequence + of a given length. + """ + if length is None: + length = self.sequence_length + + if random_state is None: + random_state = np.random.RandomState() + + if isinstance(random_state, int): + random_state = np.random.RandomState(random_state) + elif isinstance(random_state, np.random.RandomState): + pass + else: + raise ValueError( + "The random_state parameter must be an integer or an instance of " + "np.random.RandomState." + ) + + sequence = self.alphabet[random_state.randint(len(self.alphabet))] + current_state = self.alphabet.index(sequence) + + for _ in range(length - 1): + next_state = random_state.choice( + len(self.alphabet), p=self.transition_matrix[current_state] + ) + if not repeating_allowed: + while next_state == current_state: + next_state = random_state.choice( + len(self.alphabet), + p=self.transition_matrix[current_state], + ) + sequence += self.alphabet[next_state] + current_state = next_state + + return sequence + + def _is_feasible(self, sequence: str | np.ndarray) -> bool: + """ + Checks whether a sequence (str or array of one sequence) is feasible + under the transition matrix. This is done by looping through + the sequence and determining whether the transition probabilities + are non-zero. + """ + if isinstance(sequence, np.ndarray): + assert sequence.ndim == 1 or sequence.shape[0] == 1 + sequence = "".join(sequence.flatten()) + + current_state = self.alphabet.index(sequence[0]) + for i in range(1, len(sequence)): + next_state = self.alphabet.index(sequence[i]) + + if np.isclose(self.transition_matrix[current_state, next_state], 0.0): + return False + current_state = next_state + + return True + + def construct_random_motifs( + self, motif_length: int, n_motifs: int, seed: int = None + ) -> np.ndarray: + """ + Creates a given number of random motifs of a certain length. + """ + assert motif_length * n_motifs <= self.sequence_length + + random_state = np.random.RandomState(seed) + + # Sampling a sequence of length motif_length * n_motifs + sequence = self._sample_random_sequence( + length=motif_length * n_motifs, + random_state=random_state, + repeating_allowed=False, + ) + + # Chunking it into n_motifs + motifs = np.array( + [ + list(sequence[i * motif_length : (i + 1) * motif_length]) + for i in range(0, n_motifs) + ] + ) + + return motifs + + def construct_random_offsets( + self, + motif_length: int, + n_motifs: int, + seed: int = None, + ) -> np.ndarray: + """ + Creates a given number of random offsets for the motifs. + """ + all_motifs_length = motif_length * n_motifs + + # For each motif, we sample weights in the simplex + # from a uniform dirichlet + random_state = np.random.RandomState(seed) + + offsets = [] + for _ in range(n_motifs): + weights = random_state.dirichlet(np.ones(motif_length - 1)) + _offset_for_motif = [0] + for weight in weights: + _offset_for_motif.append( + 1 + + np.floor( + weight * (self.sequence_length - all_motifs_length) // n_motifs + ) + ) + + offset_for_motif = np.cumsum(np.array(_offset_for_motif, dtype=int)) + + offsets.append(offset_for_motif) + + return np.array(offsets) + + def construct_optimal_solution( + self, motifs: np.ndarray | None = None, offsets: np.ndarray | None = None + ) -> np.ndarray: + """ + Constructs an optimal solution for a given set of motifs and offsets. + + If None are provided, then the motifs and offsets of the black box + are used. + """ + if motifs is None: + motifs = self.motifs + + if offsets is None: + offsets = self.offsets + + # de-cumsum the offsets + offsets = np.diff(offsets, prepend=0) + optimal_sequence = [] + + for motif, offset in zip(motifs, offsets): + # We write first l - 1 characters according to the offsets, + # and wait to write the last character at the end + for character, next_offset_value in zip(motif, offset[1:]): + # Put the current character in the current position all the way through just before the offset + optimal_sequence += [character] * next_offset_value + + # Write the last character + optimal_sequence += [motif[-1]] + + # We pad until the sequence length with the last character + # of the last motif + optimal_sequence += [motifs[-1][-1]] * ( + self.sequence_length - len(optimal_sequence) + ) + + return np.array(optimal_sequence).reshape(1, -1) + + def _maximal_motif_matches( + self, sequence: np.ndarray, motif: np.ndarray, offset: np.ndarray + ) -> int: + """ + Counts the maximal motif match. + """ + assert sequence.ndim == 1 or sequence.shape[0] == 1 + sequence = "".join(sequence.flatten()) + maximal_match = 0 + for seq_idx in range(len(sequence) - max(offset)): + matches = 0 + sequence_at_offset = np.array( + [sequence[seq_idx + offset_value] for offset_value in offset] + ) + matches = sum(sequence_at_offset == motif) + + maximal_match = max(maximal_match, matches) + + return maximal_match + + def _black_box(self, x: np.ndarray, context=None) -> np.ndarray: + """ + Evaluates the sequences in x by checking maximal matches and multiplying. + """ + values = [] + for sequence in x: + if not self._is_feasible(sequence): + values.append(-np.inf) + continue + + value = 1.0 + for motif, offset in zip(self.motifs, self.offsets): + maximal_matches = self._maximal_motif_matches(sequence, motif, offset) + value *= ( + maximal_matches // (self.motif_length / self.quantization) + ) / self.quantization + + values.append(value) + + return np.array(values).reshape(-1, 1) + + @staticmethod + def get_black_box_info() -> BlackBoxInformation: + return ehrlich_info + + +class EhrlichProblemFactory(AbstractProblemFactory): + """ + A factory for creating Ehrlich functions and initial conditions. + + References + ---------- + [1] Stanton, S., Alberstein, R., Frey, N., Watkins, A., & Cho, K. (2024). + Closed-Form Test Functions for Biophysical Sequence Optimization Algorithms. + arXiv preprint arXiv:2407.00236. https://arxiv.org/abs/2407.00236 + """ + + def __init__(self) -> None: + super().__init__() + + def get_setup_information(self) -> BlackBoxInformation: + return ehrlich_info + + def create( + self, + sequence_length: int, + motif_length: int, + n_motifs: int, + quantization: int | None = None, + seed: int = None, + alphabet: list[str] = AMINO_ACIDS, + batch_size: int = None, + parallelize: bool = False, + num_workers: int = None, + evaluation_budget: int = float("inf"), + ) -> Problem: + """ + Creates an Ehrlich function problem (containing an Ehrlich black box and + an initial condition). + + Parameters + ---------- + sequence_length : int + The length of the sequence to be optimized. This length is fixed, and + _only_ sequences of this length are considered. + motif_length : int + The length of the motifs. + n_motifs : int + The number of motifs. + quantization : int, optional + The quantization parameter. This parameter must be between 1 and the + motif length, and the motif length must be divisible by the quantization. + By default, it is None (which corresponds to the motif length). + seed : int, optional + The seed for the random number generator. By default, it is None + (i.e. no seed is set). + alphabet : list of str, optional + The alphabet to be used for the sequences. By default, it is the + of 20 amino acids. + batch_size : int, optional + The batch size for the black box. By default, it is None (i.e. all + sequences are evaluated in a vectorized way). + parallelize : bool, optional + Whether to parallelize the evaluation of the black box. By default, + it is False. + num_workers : int, optional + The number of processors used in parallelization. + evaluation_budget : int, optional + The evaluation budget for the black box. By default, it is infinite. + + References + ---------- + [1] Stanton, S., Alberstein, R., Frey, N., Watkins, A., & Cho, K. (2024). + Closed-Form Test Functions for Biophysical Sequence Optimization Algorithms. + arXiv preprint arXiv:2407.00236. https://arxiv.org/abs/2407.00236 + """ + if seed is not None: + seed_python_numpy_and_torch(seed) + + f = EhrlichBlackBox( + sequence_length=sequence_length, + motif_length=motif_length, + n_motifs=n_motifs, + quantization=quantization, + seed=seed, + alphabet=alphabet, + batch_size=batch_size, + parallelize=parallelize, + num_workers=num_workers, + evaluation_budget=evaluation_budget, + ) + x0 = np.array([list(f._sample_random_sequence())]) + + return Problem(f, x0) diff --git a/src/poli/tests/registry/toy_discrete_problems/__init__.py b/src/poli/tests/registry/toy_discrete_problems/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/poli/tests/registry/toy_discrete_problems/test_ehrlich.py b/src/poli/tests/registry/toy_discrete_problems/test_ehrlich.py new file mode 100644 index 00000000..f21415c0 --- /dev/null +++ b/src/poli/tests/registry/toy_discrete_problems/test_ehrlich.py @@ -0,0 +1,166 @@ +""" +This is a suite of tests for the Ehrlich functions +proposed by Stanton et al. (2024). +""" + +import numpy as np + +import pytest + +from poli.objective_repository.ehrlich._construct_feasibility_matrix import ( + _construct_transition_matrix, +) +from poli.repository import EhrlichBlackBox, EhrlichProblemFactory + + +@pytest.mark.parametrize("size", [3, 5, 8, 10]) +@pytest.mark.parametrize("seed", [1, 2, 3, 4]) +def test_sparse_matrix_construction_is_ergodic_and_aperiodic(size: int, seed: int): + sparse_transition_matrix = _construct_transition_matrix(size, seed=seed) + + # Checking with the Perron-Frobenius condition + assert ( + np.linalg.matrix_power(sparse_transition_matrix, (size - 1) ** 2 + 1) > 0.0 + ).all() + + +@pytest.mark.parametrize("seed", [1, 4]) +@pytest.mark.parametrize("sequence_length", [10, 20, 50, 100]) +@pytest.mark.parametrize("motif_length", [2, 3, 4, 5]) +@pytest.mark.parametrize("n_motifs", [1, 2, 3, 4]) +def test_ehrlich_motifs_and_offsets_are_deterministic( + seed: int, sequence_length: int, motif_length: int, n_motifs: int +): + if n_motifs * motif_length > sequence_length: + pytest.skip( + "The total length of the motifs is greater than the sequence length." + ) + ehrlich_1 = EhrlichBlackBox( + sequence_length=sequence_length, + motif_length=motif_length, + n_motifs=n_motifs, + seed=seed, + ) + ehrlich_2 = EhrlichBlackBox( + sequence_length=sequence_length, + motif_length=motif_length, + n_motifs=n_motifs, + seed=seed, + ) + + assert np.all(ehrlich_1.motifs == ehrlich_2.motifs) + assert np.all(ehrlich_1.offsets == ehrlich_2.offsets) + + +@pytest.mark.parametrize("sequence_length", [10, 20, 50, 100]) +@pytest.mark.parametrize("motif_length", [2, 3, 4, 5]) +@pytest.mark.parametrize("n_motifs", [1, 2, 3, 4]) +def test_ehrlich_gives_different_motifs_for_different_seeds( + sequence_length, motif_length, n_motifs +): + if n_motifs * motif_length > sequence_length: + pytest.skip( + "The total length of the motifs is greater than the sequence length." + ) + + ehrlich_1 = EhrlichBlackBox( + sequence_length=sequence_length, + motif_length=motif_length, + n_motifs=n_motifs, + seed=1, + ) + ehrlich_2 = EhrlichBlackBox( + sequence_length=sequence_length, + motif_length=motif_length, + n_motifs=n_motifs, + seed=2, + ) + + assert not np.all(ehrlich_1.motifs == ehrlich_2.motifs) + + +@pytest.mark.parametrize("sequence_length", [10, 20, 50, 100]) +@pytest.mark.parametrize("motif_length", [2, 3, 4, 5]) +@pytest.mark.parametrize("n_motifs", [1, 2, 3, 4]) +def test_ehrlich_function_produces_optimal_sequences( + sequence_length: int, motif_length: int, n_motifs: int +): + if n_motifs * motif_length > sequence_length: + pytest.skip( + "The total length of the motifs is greater than the sequence length." + ) + + ehrlich = EhrlichBlackBox( + sequence_length=sequence_length, + motif_length=motif_length, + n_motifs=n_motifs, + seed=1, + ) + + optimal_sequence = ehrlich.construct_optimal_solution() + assert ehrlich._is_feasible(optimal_sequence) + assert ehrlich(optimal_sequence) == 1.0 + + +def test_consistency_of_ehrlich_function_motif_matching(): + ehrlich = EhrlichBlackBox( + sequence_length=10, + motif_length=3, + n_motifs=2, + quantization=3, + seed=1, + ) + + one_sequence = np.array(["E"] * 10).reshape(1, 10) + motif_matches = ehrlich._maximal_motif_matches( + one_sequence, np.array(["E", "V", "D"]), np.array([0, 1, 3]) + ) + assert motif_matches == 1 + + another_sequence = "EVEEEEEEEE" + another_sequence = np.array(list(another_sequence)).reshape(1, 10) + assert ( + ehrlich._maximal_motif_matches( + another_sequence, np.array(["E", "V", "D"]), np.array([0, 1, 3]) + ) + == 2 + ) + + yet_another_sequence = "EEEEVEDEEE" + yet_another_sequence = np.array(list(yet_another_sequence)).reshape(1, 10) + assert ( + ehrlich._maximal_motif_matches( + yet_another_sequence, np.array(["E", "V", "D"]), np.array([0, 1, 3]) + ) + == 3 + ) + + +def test_creating_a_problem_with_a_factory(): + problem_factory = EhrlichProblemFactory() + + problem = problem_factory.create( + sequence_length=10, + motif_length=3, + n_motifs=2, + quantization=3, + seed=1, + ) + + f, x0 = problem.black_box, problem.x0 + y0 = f(x0) + + +def test_creating_with_create(): + from poli import create + + problem = create( + name="ehrlich", + sequence_length=10, + motif_length=3, + n_motifs=2, + quantization=3, + seed=1, + ) + f, x0 = problem.black_box, problem.x0 + y0 = f(x0) From 7d531bd5174a86ca479029b577314be9b74f4abd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Gonz=C3=A1lez=20Duque?= Date: Thu, 11 Jul 2024 16:00:11 +0200 Subject: [PATCH 05/10] Adds pre-commit hooks (#216) * Adds a first version of the Ehrlich function proposed by Stanton et al * Adds seeds for all functions in Ehrlich * Adds tests for motif matching, seeding and optimal sequence creation * Adds problem factory and associated tests * Adds docstrings * Adds pre-commit hooks * Adds isort with black compatiblity * Removes the examples folder from isort * Adds pre-commit hooks * Adds isort with black compatiblity * Removes the examples folder from isort --- .pre-commit-config.yaml | 16 ++ examples/minimal_working_example.py | 1 + .../example_logging_rdkit_qed_using_wandb.py | 3 +- .../adding_a_wandb_observer/wandb_observer.py | 2 +- ...example_logging_rdkit_logp_using_mlflow.py | 3 +- .../mlflow_observer.py | 1 + .../00_registering_an_observer.py | 4 +- .../02_discouraged_use.py | 5 +- ...aturation_mutagenesis_at_first_position.py | 2 +- .../querying_qed_and_logp.py | 2 +- .../registering_aloha.py | 2 +- pyproject.toml | 5 +- src/poli/__init__.py | 3 +- src/poli/benchmarks/__init__.py | 6 +- src/poli/benchmarks/guacamol.py | 9 +- src/poli/benchmarks/pmo.py | 4 +- .../toy_continuous_functions_benchmark.py | 6 +- src/poli/core/abstract_black_box.py | 7 +- src/poli/core/benchmark_information.py | 2 +- src/poli/core/black_box_information.py | 2 +- src/poli/core/chemistry/tdc_black_box.py | 1 - .../core/chemistry/tdc_isolated_function.py | 2 - src/poli/core/multi_objective_black_box.py | 2 +- src/poli/core/proteins/foldx_black_box.py | 7 +- .../core/proteins/foldx_isolated_function.py | 7 +- src/poli/core/registry.py | 13 +- src/poli/core/util/batch/batch_input.py | 2 +- .../core/util/chemistry/string_to_molecule.py | 3 +- src/poli/core/util/external_observer.py | 3 +- .../util/files/download_files_from_github.py | 4 +- src/poli/core/util/files/integrity.py | 3 +- .../process_wrapper.py | 5 +- src/poli/core/util/isolation/instancing.py | 10 +- .../objective_management/make_run_script.py | 17 +- src/poli/core/util/observer_wrapper.py | 2 +- src/poli/core/util/proteins/foldx.py | 11 +- src/poli/core/util/proteins/mutations.py | 1 - src/poli/core/util/proteins/rasp/__init__.py | 2 +- .../proteins/rasp/inner_rasp/PrismData.py | 17 +- .../util/proteins/rasp/inner_rasp/helpers.py | 2 +- .../pdb_parser_scripts/clean_pdb.py | 6 +- .../extract_environments.py | 1 - .../proteins/rasp/inner_rasp/run_pipeline.py | 3 +- .../proteins/rasp/inner_rasp/visualization.py | 17 +- .../core/util/proteins/rasp/load_models.py | 6 +- .../core/util/proteins/rasp/rasp_interface.py | 38 ++--- src/poli/core/util/seeding/__init__.py | 2 +- src/poli/core/util/seeding/seeding.py | 3 +- src/poli/external_isolated_function_script.py | 3 +- src/poli/external_problem_factory_script.py | 3 +- src/poli/objective_factory.py | 16 +- src/poli/objective_repository/__init__.py | 157 ++++++++---------- .../albuterol_similarity/register.py | 6 - .../objective_repository/aloha/register.py | 5 +- .../amlodipine_mpo/register.py | 10 +- .../celecoxib_rediscovery/register.py | 7 - .../objective_repository/deco_hop/register.py | 6 - .../dockstring/isolated_function.py | 8 +- .../dockstring/register.py | 10 +- .../drd2_docking/register.py | 7 +- .../drd3_docking/__init__.py | 6 +- .../drd3_docking/register.py | 8 +- .../objective_repository/ehrlich/register.py | 13 +- .../fexofenadine_mpo/register.py | 6 - .../foldx_rfp_lambo/isolated_function.py | 22 ++- .../foldx_rfp_lambo/register.py | 8 +- .../foldx_sasa/information.py | 2 +- .../foldx_sasa/isolated_function.py | 7 +- .../foldx_sasa/register.py | 7 +- .../foldx_stability/information.py | 2 +- .../foldx_stability/isolated_function.py | 7 +- .../foldx_stability/register.py | 13 +- .../foldx_stability_and_sasa/information.py | 2 +- .../isolated_function.py | 7 +- .../foldx_stability_and_sasa/register.py | 12 +- .../gfp_cbas/cbas_alphabet_preprocessing.py | 2 +- .../objective_repository/gfp_cbas/gfp_gp.py | 4 +- .../gfp_cbas/isolated_function.py | 3 +- .../objective_repository/gfp_cbas/register.py | 7 +- .../gfp_select/information.py | 1 - .../gfp_select/isolated_function.py | 1 + .../gfp_select/register.py | 5 +- .../gsk3_beta/register.py | 4 - .../isomer_c7h8n2o2/register.py | 6 - .../isomer_c9h10n2o2pf2cl/register.py | 6 - .../objective_repository/jnk3/register.py | 4 - .../objective_repository/median_1/register.py | 6 - .../objective_repository/median_2/register.py | 6 - .../mestranol_similarity/register.py | 6 - .../osimetrinib_mpo/register.py | 6 - .../penalized_logp_lambo/isolated_function.py | 7 +- .../penalized_logp_lambo/register.py | 6 +- .../perindopril_mpo/register.py | 6 - .../ranolazine_mpo/register.py | 6 - .../objective_repository/rasp/information.py | 1 - .../rasp/isolated_function.py | 22 ++- .../objective_repository/rasp/register.py | 12 +- .../rdkit_logp/register.py | 6 +- .../rdkit_qed/register.py | 8 +- .../information.py | 2 +- .../rfp_foldx_stability_and_sasa/register.py | 7 +- .../objective_repository/sa_tdc/register.py | 9 +- .../scaffold_hop/register.py | 6 - .../sitagliptin_mpo/register.py | 6 - .../super_mario_bros/isolated_function.py | 9 +- .../super_mario_bros/level_utils.py | 2 +- .../super_mario_bros/register.py | 7 +- .../super_mario_bros/simulator.py | 4 +- .../thiothixene_rediscovery/register.py | 6 - .../toy_continuous_problem/register.py | 12 +- .../toy_continuous_problem.py | 18 +- .../troglitazone_rediscovery/register.py | 6 - .../valsartan_smarts/register.py | 10 +- .../white_noise/register.py | 6 +- .../zaleplon_mpo/register.py | 6 - .../docs_examples/test_objective_functions.py | 29 ++-- src/poli/tests/observers/test_observers.py | 7 +- .../parallelization/test_parallelization.py | 5 +- .../test_budget_exhaustion.py | 3 +- .../chemistry/test_chemistry_objectives.py | 32 ++-- .../tests/registry/proteins/test_foldx.py | 2 +- .../registry/proteins/test_foldx_rfp_lambo.py | 3 +- src/poli/tests/registry/proteins/test_rasp.py | 2 +- .../registry/test_black_box_instancing.py | 8 +- .../test_multi_objective_and_negative.py | 2 +- .../registry/test_passing_array_of_strings.py | 1 + ...t_instancing_of_toy_continuous_problems.py | 5 +- .../toy_discrete_problems/test_ehrlich.py | 1 - .../test_black_box_instancing.py | 8 +- src/poli/tests/test_core_promises.py | 2 +- .../tests/test_minimal_working_example.py | 2 + src/poli/tests/util/test_foldx_interface.py | 12 +- src/poli/tests/util/test_protein_utilities.py | 5 +- 133 files changed, 385 insertions(+), 636 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..3a292f1d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,16 @@ +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: trailing-whitespace + exclude: '.*\.pdb$' + - id: check-yaml + - id: check-added-large-files +- repo: https://github.com/psf/black + rev: 24.1.1 + hooks: + - id: black +- repo: https://github.com/PyCQA/isort + rev: 5.13.2 + hooks: + - id: isort diff --git a/examples/minimal_working_example.py b/examples/minimal_working_example.py index 25db9401..205df759 100644 --- a/examples/minimal_working_example.py +++ b/examples/minimal_working_example.py @@ -6,6 +6,7 @@ """ import numpy as np + from poli import objective_factory problem = objective_factory.create(name="white_noise") diff --git a/examples/observers/adding_a_wandb_observer/example_logging_rdkit_qed_using_wandb.py b/examples/observers/adding_a_wandb_observer/example_logging_rdkit_qed_using_wandb.py index 67885edd..9335e4aa 100644 --- a/examples/observers/adding_a_wandb_observer/example_logging_rdkit_qed_using_wandb.py +++ b/examples/observers/adding_a_wandb_observer/example_logging_rdkit_qed_using_wandb.py @@ -9,12 +9,11 @@ from pathlib import Path import numpy as np +from wandb_observer import WandbObserver from poli.core.problem import Problem from poli.objective_repository import QEDProblemFactory -from wandb_observer import WandbObserver - THIS_DIR = Path(__file__).parent.resolve() if __name__ == "__main__": diff --git a/examples/observers/adding_a_wandb_observer/wandb_observer.py b/examples/observers/adding_a_wandb_observer/wandb_observer.py index 65f6b0e0..fca3d1cb 100644 --- a/examples/observers/adding_a_wandb_observer/wandb_observer.py +++ b/examples/observers/adding_a_wandb_observer/wandb_observer.py @@ -6,8 +6,8 @@ """ import numpy as np -import wandb +import wandb from poli.core.black_box_information import BlackBoxInformation from poli.core.util.abstract_observer import AbstractObserver diff --git a/examples/observers/adding_an_mlflow_observer/example_logging_rdkit_logp_using_mlflow.py b/examples/observers/adding_an_mlflow_observer/example_logging_rdkit_logp_using_mlflow.py index da692392..6f7d3d38 100644 --- a/examples/observers/adding_an_mlflow_observer/example_logging_rdkit_logp_using_mlflow.py +++ b/examples/observers/adding_an_mlflow_observer/example_logging_rdkit_logp_using_mlflow.py @@ -15,11 +15,10 @@ from pathlib import Path import numpy as np +from mlflow_observer import MlFlowObserver from poli import objective_factory -from mlflow_observer import MlFlowObserver - THIS_DIR = Path(__file__).parent.resolve() if __name__ == "__main__": diff --git a/examples/observers/adding_an_mlflow_observer/mlflow_observer.py b/examples/observers/adding_an_mlflow_observer/mlflow_observer.py index a9d531c8..3a8e6248 100644 --- a/examples/observers/adding_an_mlflow_observer/mlflow_observer.py +++ b/examples/observers/adding_an_mlflow_observer/mlflow_observer.py @@ -11,6 +11,7 @@ """ from pathlib import Path + import mlflow import numpy as np diff --git a/examples/observers/registering_an_observer/00_registering_an_observer.py b/examples/observers/registering_an_observer/00_registering_an_observer.py index 022c6720..65059204 100644 --- a/examples/observers/registering_an_observer/00_registering_an_observer.py +++ b/examples/observers/registering_an_observer/00_registering_an_observer.py @@ -15,10 +15,10 @@ of how to instantiate it after registration. """ -from poli.core.registry import register_observer - from print_observer import SimplePrintObserver +from poli.core.registry import register_observer + if __name__ == "__main__": register_observer( observer=SimplePrintObserver(), diff --git a/examples/observers/registering_an_observer/02_discouraged_use.py b/examples/observers/registering_an_observer/02_discouraged_use.py index 83f6888d..ed2af519 100644 --- a/examples/observers/registering_an_observer/02_discouraged_use.py +++ b/examples/observers/registering_an_observer/02_discouraged_use.py @@ -6,11 +6,10 @@ """ import numpy as np - -from poli.core.registry import DEFAULT_OBSERVER_NAME from print_observer import SimplePrintObserver -from poli import objective_factory +from poli import objective_factory +from poli.core.registry import DEFAULT_OBSERVER_NAME if __name__ == "__main__": # Instantiate the objective diff --git a/examples/protein_stability_and_sasa/saturation_mutagenesis_for_two_proteins_using_rasp/saturation_mutagenesis_at_first_position.py b/examples/protein_stability_and_sasa/saturation_mutagenesis_for_two_proteins_using_rasp/saturation_mutagenesis_at_first_position.py index 0cb875f9..8e22cbce 100644 --- a/examples/protein_stability_and_sasa/saturation_mutagenesis_for_two_proteins_using_rasp/saturation_mutagenesis_at_first_position.py +++ b/examples/protein_stability_and_sasa/saturation_mutagenesis_for_two_proteins_using_rasp/saturation_mutagenesis_at_first_position.py @@ -13,8 +13,8 @@ import numpy as np import pandas as pd -from poli.core.util.proteins.defaults import AMINO_ACIDS from poli import objective_factory +from poli.core.util.proteins.defaults import AMINO_ACIDS THIS_DIR = Path(__file__).parent.resolve() diff --git a/examples/small_molecules/querying_qed_and_logp_of_selfies/querying_qed_and_logp.py b/examples/small_molecules/querying_qed_and_logp_of_selfies/querying_qed_and_logp.py index 10accdc8..af12e519 100644 --- a/examples/small_molecules/querying_qed_and_logp_of_selfies/querying_qed_and_logp.py +++ b/examples/small_molecules/querying_qed_and_logp_of_selfies/querying_qed_and_logp.py @@ -13,7 +13,7 @@ import numpy as np -from poli.objective_repository import QEDBlackBox, LogPBlackBox +from poli.objective_repository import LogPBlackBox, QEDBlackBox THIS_DIR = Path(__file__).parent.resolve() diff --git a/examples/the_basics/a_simple_objective_function_registration/registering_aloha.py b/examples/the_basics/a_simple_objective_function_registration/registering_aloha.py index f65472c5..eb2675d3 100644 --- a/examples/the_basics/a_simple_objective_function_registration/registering_aloha.py +++ b/examples/the_basics/a_simple_objective_function_registration/registering_aloha.py @@ -5,8 +5,8 @@ the initial point, and its first evaluation. """ -from typing import Tuple from string import ascii_uppercase +from typing import Tuple import numpy as np diff --git a/pyproject.toml b/pyproject.toml index f479db96..46b1fd4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,4 +37,7 @@ Homepage = "https://github.com/MachineLearningLifeScience/poli" [tool.pytest.ini_options] markers = [ "slow: marks tests as slow (deselect with '-m \"not slow\"')", -] \ No newline at end of file +] + +[tool.isort] +profile = "black" diff --git a/src/poli/__init__.py b/src/poli/__init__.py index d4a8de49..edca0642 100644 --- a/src/poli/__init__.py +++ b/src/poli/__init__.py @@ -1,7 +1,8 @@ """poli, a library for discrete black-box objective functions.""" __author__ = "Simon Bartels & Miguel González-Duque (MLLS)" +from .core.util.isolation.instancing import instance_function_as_isolated_process + # from .core import get_problems from .objective_factory import create -from .core.util.isolation.instancing import instance_function_as_isolated_process from .objective_repository import get_problems diff --git a/src/poli/benchmarks/__init__.py b/src/poli/benchmarks/__init__.py index 98a8605e..561558af 100644 --- a/src/poli/benchmarks/__init__.py +++ b/src/poli/benchmarks/__init__.py @@ -1,7 +1,7 @@ +from .guacamol import GuacaMolGoalDirectedBenchmark +from .pmo import PMOBenchmark from .toy_continuous_functions_benchmark import ( - ToyContinuousFunctionsBenchmark, EmbeddedBranin2D, EmbeddedHartmann6D, + ToyContinuousFunctionsBenchmark, ) -from .guacamol import GuacaMolGoalDirectedBenchmark -from .pmo import PMOBenchmark diff --git a/src/poli/benchmarks/guacamol.py b/src/poli/benchmarks/guacamol.py index b6317b91..0dd45f03 100644 --- a/src/poli/benchmarks/guacamol.py +++ b/src/poli/benchmarks/guacamol.py @@ -20,14 +20,13 @@ [2] Huang, Kexin, Tianfan Fu, Wenhao Gao, Yue Zhao, Yusuf Roohani, Jure Leskovec, Connor W Coley, Cao Xiao, Jimeng Sun, and Marinka Zitnik. “Therapeutics Data Commons: Machine Learning Datasets and Tasks for Drug Discovery and Development.” - Proceedings of Neural Information Processing Systems, NeurIPS Datasets and Benchmarks, 2021. + Proceedings of Neural Information Processing Systems, NeurIPS Datasets and Benchmarks, 2021. """ from typing import Literal, Union -from poli.core.problem import Problem from poli.core.abstract_benchmark import AbstractBenchmark - +from poli.core.problem import Problem from poli.objective_repository import ( AlbuterolSimilarityProblemFactory, AmlodipineMPOProblemFactory, @@ -36,14 +35,14 @@ FexofenadineMPOProblemFactory, IsomerC7H8N2O2ProblemFactory, IsomerC9H10N2O2PF2ClProblemFactory, + LogPProblemFactory, Median1ProblemFactory, Median2ProblemFactory, MestranolSimilarityProblemFactory, OsimetrinibMPOProblemFactory, PerindoprilMPOProblemFactory, - RanolazineMPOProblemFactory, - LogPProblemFactory, QEDProblemFactory, + RanolazineMPOProblemFactory, SAProblemFactory, ScaffoldHopProblemFactory, SitagliptinMPOProblemFactory, diff --git a/src/poli/benchmarks/pmo.py b/src/poli/benchmarks/pmo.py index 65870701..6652141a 100644 --- a/src/poli/benchmarks/pmo.py +++ b/src/poli/benchmarks/pmo.py @@ -23,7 +23,7 @@ [3] Huang, Kexin, Tianfan Fu, Wenhao Gao, Yue Zhao, Yusuf Roohani, Jure Leskovec, Connor W Coley, Cao Xiao, Jimeng Sun, and Marinka Zitnik. “Therapeutics Data Commons: Machine Learning Datasets and Tasks for Drug Discovery and Development.” - Proceedings of Neural Information Processing Systems, NeurIPS Datasets and Benchmarks, 2021. + Proceedings of Neural Information Processing Systems, NeurIPS Datasets and Benchmarks, 2021. """ @@ -31,8 +31,8 @@ from poli.objective_repository import ( DRD2ProblemFactory, - JNK3ProblemFactory, GSK3BetaProblemFactory, + JNK3ProblemFactory, ) from .guacamol import GuacaMolGoalDirectedBenchmark diff --git a/src/poli/benchmarks/toy_continuous_functions_benchmark.py b/src/poli/benchmarks/toy_continuous_functions_benchmark.py index 78435a43..6cf2fd78 100644 --- a/src/poli/benchmarks/toy_continuous_functions_benchmark.py +++ b/src/poli/benchmarks/toy_continuous_functions_benchmark.py @@ -15,15 +15,15 @@ https://www.sfu.ca/~ssurjano/optimization.html. """ -from typing import Union, List +from typing import List, Union + from poli.core.abstract_benchmark import AbstractBenchmark from poli.core.problem import Problem - from poli.objective_repository import ToyContinuousProblemFactory from poli.objective_repository.toy_continuous_problem.toy_continuous_problem import ( POSSIBLE_FUNCTIONS, - TWO_DIMENSIONAL_PROBLEMS, SIX_DIMENSIONAL_PROBLEMS, + TWO_DIMENSIONAL_PROBLEMS, ) diff --git a/src/poli/core/abstract_black_box.py b/src/poli/core/abstract_black_box.py index 533c7e79..a3094d26 100644 --- a/src/poli/core/abstract_black_box.py +++ b/src/poli/core/abstract_black_box.py @@ -2,18 +2,17 @@ all objective functions should inherit. """ +from multiprocessing import Pool, cpu_count from warnings import warn import numpy as np -from multiprocessing import Pool, cpu_count from poli.core.black_box_information import BlackBoxInformation +from poli.core.exceptions import BudgetExhaustedException from poli.core.problem_setup_information import ProblemSetupInformation - from poli.core.util.abstract_observer import AbstractObserver -from poli.core.util.batch import batched from poli.core.util.alignment import is_aligned_input -from poli.core.exceptions import BudgetExhaustedException +from poli.core.util.batch import batched class AbstractBlackBox: diff --git a/src/poli/core/benchmark_information.py b/src/poli/core/benchmark_information.py index be35b3b3..98a88485 100644 --- a/src/poli/core/benchmark_information.py +++ b/src/poli/core/benchmark_information.py @@ -1,4 +1,4 @@ -from typing import Union, Literal +from typing import Literal, Union class BenchmarkInformation: diff --git a/src/poli/core/black_box_information.py b/src/poli/core/black_box_information.py index 78d2c5a8..325a56e8 100644 --- a/src/poli/core/black_box_information.py +++ b/src/poli/core/black_box_information.py @@ -12,7 +12,7 @@ - The alphabet of allowed characters. """ -from typing import Union, Literal +from typing import Literal, Union class BlackBoxInformation: diff --git a/src/poli/core/chemistry/tdc_black_box.py b/src/poli/core/chemistry/tdc_black_box.py index 4d922d78..a96856c3 100644 --- a/src/poli/core/chemistry/tdc_black_box.py +++ b/src/poli/core/chemistry/tdc_black_box.py @@ -21,7 +21,6 @@ import numpy as np from poli.core.abstract_black_box import AbstractBlackBox - from poli.core.util.isolation.instancing import get_inner_function diff --git a/src/poli/core/chemistry/tdc_isolated_function.py b/src/poli/core/chemistry/tdc_isolated_function.py index afc3ca9d..6c4ca65c 100644 --- a/src/poli/core/chemistry/tdc_isolated_function.py +++ b/src/poli/core/chemistry/tdc_isolated_function.py @@ -12,11 +12,9 @@ """ import numpy as np - from tdc import Oracle from poli.core.abstract_isolated_function import AbstractIsolatedFunction - from poli.core.util.chemistry.string_to_molecule import translate_selfies_to_smiles diff --git a/src/poli/core/multi_objective_black_box.py b/src/poli/core/multi_objective_black_box.py index bf6df1ed..4630004e 100644 --- a/src/poli/core/multi_objective_black_box.py +++ b/src/poli/core/multi_objective_black_box.py @@ -9,8 +9,8 @@ import numpy as np from poli.core.abstract_black_box import AbstractBlackBox -from poli.core.problem_setup_information import ProblemSetupInformation from poli.core.black_box_information import BlackBoxInformation +from poli.core.problem_setup_information import ProblemSetupInformation class MultiObjectiveBlackBox(AbstractBlackBox): diff --git a/src/poli/core/proteins/foldx_black_box.py b/src/poli/core/proteins/foldx_black_box.py index 4db32443..100496d8 100644 --- a/src/poli/core/proteins/foldx_black_box.py +++ b/src/poli/core/proteins/foldx_black_box.py @@ -11,20 +11,19 @@ 33(suppl_2), W382-W388. """ -from typing import Union, List +from multiprocessing import cpu_count from pathlib import Path from time import time +from typing import List, Union from uuid import uuid4 -from multiprocessing import cpu_count from poli.core.abstract_black_box import AbstractBlackBox from poli.core.problem_setup_information import ProblemSetupInformation - +from poli.core.util.proteins.foldx import FoldxInterface from poli.core.util.proteins.pdb_parsing import ( parse_pdb_as_residue_strings, parse_pdb_as_residues, ) -from poli.core.util.proteins.foldx import FoldxInterface # This is the folder where all the files # generated by FoldX will be stored. diff --git a/src/poli/core/proteins/foldx_isolated_function.py b/src/poli/core/proteins/foldx_isolated_function.py index f2ffe51a..6fcb0991 100644 --- a/src/poli/core/proteins/foldx_isolated_function.py +++ b/src/poli/core/proteins/foldx_isolated_function.py @@ -1,19 +1,18 @@ -from typing import Union, List +from multiprocessing import cpu_count from pathlib import Path from time import time +from typing import List, Union from uuid import uuid4 -from multiprocessing import cpu_count import numpy as np from poli.core.abstract_isolated_function import AbstractIsolatedFunction from poli.core.problem_setup_information import ProblemSetupInformation - +from poli.core.util.proteins.foldx import FoldxInterface from poli.core.util.proteins.pdb_parsing import ( parse_pdb_as_residue_strings, parse_pdb_as_residues, ) -from poli.core.util.proteins.foldx import FoldxInterface # This is the folder where all the files # generated by FoldX will be stored. diff --git a/src/poli/core/registry.py b/src/poli/core/registry.py index 18426b66..91948a56 100644 --- a/src/poli/core/registry.py +++ b/src/poli/core/registry.py @@ -1,21 +1,20 @@ """This module contains utilities for registering problems and observers. """ -from typing import List, Union, Dict, Type import configparser -from pathlib import Path -import warnings import subprocess +import warnings +from pathlib import Path +from typing import Dict, List, Type, Union -from poli.core.abstract_isolated_function import AbstractIsolatedFunction from poli.core.abstract_black_box import AbstractBlackBox +from poli.core.abstract_isolated_function import AbstractIsolatedFunction from poli.core.abstract_problem_factory import AbstractProblemFactory - from poli.core.util.abstract_observer import AbstractObserver from poli.core.util.objective_management.make_run_script import ( - make_run_script, - make_observer_script, make_isolated_function_script, + make_observer_script, + make_run_script, ) # from poli.objective_repository import AVAILABLE_PROBLEM_FACTORIES, AVAILABLE_OBJECTIVES diff --git a/src/poli/core/util/batch/batch_input.py b/src/poli/core/util/batch/batch_input.py index c19aa1c9..ceb7ad7d 100644 --- a/src/poli/core/util/batch/batch_input.py +++ b/src/poli/core/util/batch/batch_input.py @@ -1,7 +1,7 @@ """This module implements a batched input generator.""" -from typing import Iterable from itertools import islice +from typing import Iterable def batched(iterable: Iterable, chunk_size: int): diff --git a/src/poli/core/util/chemistry/string_to_molecule.py b/src/poli/core/util/chemistry/string_to_molecule.py index 9e69973e..fcd914ae 100644 --- a/src/poli/core/util/chemistry/string_to_molecule.py +++ b/src/poli/core/util/chemistry/string_to_molecule.py @@ -4,9 +4,8 @@ from typing import List -from rdkit import Chem - import selfies as sf +from rdkit import Chem def translate_smiles_to_selfies( diff --git a/src/poli/core/util/external_observer.py b/src/poli/core/util/external_observer.py index 76525674..135be019 100644 --- a/src/poli/core/util/external_observer.py +++ b/src/poli/core/util/external_observer.py @@ -1,12 +1,13 @@ """External observer, which can be run in an isolated process.""" from typing import Any + import numpy as np from poli.core.black_box_information import BlackBoxInformation +from poli.core.registry import _DEFAULT, _OBSERVER, config from poli.core.util.abstract_observer import AbstractObserver from poli.core.util.inter_process_communication.process_wrapper import ProcessWrapper -from poli.core.registry import config, _DEFAULT, _OBSERVER class ExternalObserver(AbstractObserver): diff --git a/src/poli/core/util/files/download_files_from_github.py b/src/poli/core/util/files/download_files_from_github.py index 6da247f0..7a9ab100 100644 --- a/src/poli/core/util/files/download_files_from_github.py +++ b/src/poli/core/util/files/download_files_from_github.py @@ -1,7 +1,7 @@ """Utilities for downloading files from GitHub repositories. This module requires the PyGithub package, install it with: - + pip install PyGithub Most of this code was taken and adapted from: @@ -12,7 +12,7 @@ import os from pathlib import Path -from github import Github, GithubException, BadCredentialsException +from github import BadCredentialsException, Github, GithubException from github.ContentFile import ContentFile from github.Repository import Repository diff --git a/src/poli/core/util/files/integrity.py b/src/poli/core/util/files/integrity.py index 4ad0d66f..2ccf91bc 100644 --- a/src/poli/core/util/files/integrity.py +++ b/src/poli/core/util/files/integrity.py @@ -6,9 +6,8 @@ code is being executed. """ -from pathlib import Path - import hashlib +from pathlib import Path def compute_md5_from_filepath(filepath: Path, read_mode: str = "rb") -> str: diff --git a/src/poli/core/util/inter_process_communication/process_wrapper.py b/src/poli/core/util/inter_process_communication/process_wrapper.py index 0b50f70b..fbc4777a 100644 --- a/src/poli/core/util/inter_process_communication/process_wrapper.py +++ b/src/poli/core/util/inter_process_communication/process_wrapper.py @@ -2,12 +2,11 @@ Module that wraps utility functions for interprocess communication. """ -from pathlib import Path import logging import subprocess import time -from multiprocessing.connection import Listener, Client - +from multiprocessing.connection import Client, Listener +from pathlib import Path from uuid import uuid4 diff --git a/src/poli/core/util/isolation/instancing.py b/src/poli/core/util/isolation/instancing.py index e5d32921..1d5d16b2 100644 --- a/src/poli/core/util/isolation/instancing.py +++ b/src/poli/core/util/isolation/instancing.py @@ -1,16 +1,12 @@ from __future__ import annotations -from pathlib import Path import configparser -import subprocess import importlib - import logging -from poli.core.registry import ( - _OBSERVER, - _ISOLATED_FUNCTION_SCRIPT_LOCATION, -) +import subprocess +from pathlib import Path +from poli.core.registry import _ISOLATED_FUNCTION_SCRIPT_LOCATION, _OBSERVER from poli.core.util.inter_process_communication.process_wrapper import ProcessWrapper from .external_function import ExternalFunction diff --git a/src/poli/core/util/objective_management/make_run_script.py b/src/poli/core/util/objective_management/make_run_script.py index 4fd11590..ad380f85 100644 --- a/src/poli/core/util/objective_management/make_run_script.py +++ b/src/poli/core/util/objective_management/make_run_script.py @@ -1,22 +1,21 @@ """This module contains utilities for creating run scripts for problems and observers. """ -from typing import List, Union, Type -from pathlib import Path +import inspect import os +import stat import sys from os.path import basename, dirname, join -import inspect -import stat +from pathlib import Path +from typing import List, Type, Union -from poli import external_problem_factory_script -from poli import external_isolated_function_script -from poli.external_problem_factory_script import ADDITIONAL_IMPORT_SEARCH_PATHES_KEY -from poli.core.util import observer_wrapper -from poli.core.abstract_problem_factory import AbstractProblemFactory +from poli import external_isolated_function_script, external_problem_factory_script from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_isolated_function import AbstractIsolatedFunction +from poli.core.abstract_problem_factory import AbstractProblemFactory +from poli.core.util import observer_wrapper from poli.core.util.abstract_observer import AbstractObserver +from poli.external_problem_factory_script import ADDITIONAL_IMPORT_SEARCH_PATHES_KEY # By default, we will store the run scripts inside the # home folder of the user, on the hidden folder diff --git a/src/poli/core/util/observer_wrapper.py b/src/poli/core/util/observer_wrapper.py index 01a644e3..399a4488 100644 --- a/src/poli/core/util/observer_wrapper.py +++ b/src/poli/core/util/observer_wrapper.py @@ -1,8 +1,8 @@ """Script that gets called by the mother process to start an external observer process. """ -import sys import argparse +import sys import traceback from poli.core.util.abstract_observer import AbstractObserver diff --git a/src/poli/core/util/proteins/foldx.py b/src/poli/core/util/proteins/foldx.py index 4fdad957..d7f80676 100644 --- a/src/poli/core/util/proteins/foldx.py +++ b/src/poli/core/util/proteins/foldx.py @@ -24,17 +24,16 @@ """ -from typing import List, Union -from pathlib import Path +import logging +import os import shutil import subprocess -import os -import logging +from pathlib import Path +from typing import List, Union -from Bio.PDB.Residue import Residue from Bio.PDB import SASA +from Bio.PDB.Residue import Residue from Bio.SeqUtils import seq1 - from pdbtools.pdb_delhetatm import run as pdb_delhetatm_run from poli.core.util.proteins.mutations import ( diff --git a/src/poli/core/util/proteins/mutations.py b/src/poli/core/util/proteins/mutations.py index fbc98fa1..332be5ce 100644 --- a/src/poli/core/util/proteins/mutations.py +++ b/src/poli/core/util/proteins/mutations.py @@ -17,7 +17,6 @@ from typing import List, Tuple, Union import numpy as np - from Bio.PDB.Residue import Residue from Bio.SeqUtils import seq1 diff --git a/src/poli/core/util/proteins/rasp/__init__.py b/src/poli/core/util/proteins/rasp/__init__.py index c032cd33..9dfd941a 100644 --- a/src/poli/core/util/proteins/rasp/__init__.py +++ b/src/poli/core/util/proteins/rasp/__init__.py @@ -1,4 +1,4 @@ """Utilities for interacting with the original RaSP codebase.""" -from .rasp_interface import RaspInterface from .load_models import load_cavity_and_downstream_models +from .rasp_interface import RaspInterface diff --git a/src/poli/core/util/proteins/rasp/inner_rasp/PrismData.py b/src/poli/core/util/proteins/rasp/inner_rasp/PrismData.py index 9ca766ef..2943a94b 100644 --- a/src/poli/core/util/proteins/rasp/inner_rasp/PrismData.py +++ b/src/poli/core/util/proteins/rasp/inner_rasp/PrismData.py @@ -4,10 +4,10 @@ """Module for data handling in the PRISM project -This module implments classes for parsing (PrismParser) and handling +This module implments classes for parsing (PrismParser) and handling (PrismData derived classes) of data files. -In general, calling PrismParser.read(filename) will return a data object of +In general, calling PrismParser.read(filename) will return a data object of the same derived class, e.g. a VariantData object. See documentation of derived parser and data classes for help. @@ -16,11 +16,15 @@ __version__ = 1.001 -from Bio import Seq, SeqRecord, SeqIO, pairwise2, SubsMat -from Bio.SubsMat import MatrixInfo +import copy +import csv +import time + import numpy as np import pandas as pd -import yaml, csv, copy, time +import yaml +from Bio import Seq, SeqIO, SeqRecord, SubsMat, pairwise2 +from Bio.SubsMat import MatrixInfo class PrismFormatError(Exception): @@ -1675,7 +1679,8 @@ def merge( if __name__ == "__main__": # Parse commandline arguments - import argparse, sys + import argparse + import sys arg_parser = argparse.ArgumentParser( description="PRISM data file processing and alignment" diff --git a/src/poli/core/util/proteins/rasp/inner_rasp/helpers.py b/src/poli/core/util/proteins/rasp/inner_rasp/helpers.py index 01bb7e73..a8608df0 100644 --- a/src/poli/core/util/proteins/rasp/inner_rasp/helpers.py +++ b/src/poli/core/util/proteins/rasp/inner_rasp/helpers.py @@ -5,8 +5,8 @@ import pickle import random import sys -from typing import Dict, List, Union from pathlib import Path +from typing import Dict, List, Union import numpy as np import pandas as pd diff --git a/src/poli/core/util/proteins/rasp/inner_rasp/pdb_parser_scripts/clean_pdb.py b/src/poli/core/util/proteins/rasp/inner_rasp/pdb_parser_scripts/clean_pdb.py index a9a4e0d0..125aeb59 100644 --- a/src/poli/core/util/proteins/rasp/inner_rasp/pdb_parser_scripts/clean_pdb.py +++ b/src/poli/core/util/proteins/rasp/inner_rasp/pdb_parser_scripts/clean_pdb.py @@ -3,17 +3,15 @@ import subprocess import sys import tempfile -from io import BytesIO, StringIO import time +from io import BytesIO, StringIO from pathlib import Path import Bio.PDB import Bio.PDB.Polypeptide import Bio.SeqIO -import pdbfixer - import openmm.app - +import pdbfixer PDBIO = Bio.PDB.PDBIO() PDB_PARSER = Bio.PDB.PDBParser(PERMISSIVE=0) diff --git a/src/poli/core/util/proteins/rasp/inner_rasp/pdb_parser_scripts/extract_environments.py b/src/poli/core/util/proteins/rasp/inner_rasp/pdb_parser_scripts/extract_environments.py index dd4dd84c..7ec2cfff 100644 --- a/src/poli/core/util/proteins/rasp/inner_rasp/pdb_parser_scripts/extract_environments.py +++ b/src/poli/core/util/proteins/rasp/inner_rasp/pdb_parser_scripts/extract_environments.py @@ -7,7 +7,6 @@ import Bio import Bio.PDB import numpy as np - import openmm.app import simtk.unit diff --git a/src/poli/core/util/proteins/rasp/inner_rasp/run_pipeline.py b/src/poli/core/util/proteins/rasp/inner_rasp/run_pipeline.py index 4a1463b3..1ed54134 100644 --- a/src/poli/core/util/proteins/rasp/inner_rasp/run_pipeline.py +++ b/src/poli/core/util/proteins/rasp/inner_rasp/run_pipeline.py @@ -9,8 +9,6 @@ import pandas as pd import torch from Bio.PDB.Polypeptide import index_to_one -from torch.utils.data import DataLoader, Dataset - from cavity_model import ( CavityModel, DownstreamModel, @@ -31,6 +29,7 @@ train_val_split_cavity, train_val_split_ds, ) +from torch.utils.data import DataLoader, Dataset from visualization import ( hist_plot_all, homology_plot, diff --git a/src/poli/core/util/proteins/rasp/inner_rasp/visualization.py b/src/poli/core/util/proteins/rasp/inner_rasp/visualization.py index a4e8a756..6478dd3c 100644 --- a/src/poli/core/util/proteins/rasp/inner_rasp/visualization.py +++ b/src/poli/core/util/proteins/rasp/inner_rasp/visualization.py @@ -1,25 +1,24 @@ -import os - -import Bio.PDB.Polypeptide -import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -from scipy import stats -from sklearn.metrics import mean_absolute_error - # plt.rcParams["figure.dpi"] = 300 # plt.rcParams["figure.figsize"] = [8.0, 8.0] # plt.rcParams.update({"font.size": 14}) import glob import math +import os import pickle +import Bio.PDB.Polypeptide +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + # import mpl_scatter_density import ptitprince as pt import seaborn as sns from matplotlib.colors import LinearSegmentedColormap from matplotlib.offsetbox import AnchoredText from matplotlib.ticker import FormatStrFormatter +from scipy import stats +from sklearn.metrics import mean_absolute_error white_viridis = LinearSegmentedColormap.from_list( "white_viridis", diff --git a/src/poli/core/util/proteins/rasp/load_models.py b/src/poli/core/util/proteins/rasp/load_models.py index 26729536..66c7df35 100644 --- a/src/poli/core/util/proteins/rasp/load_models.py +++ b/src/poli/core/util/proteins/rasp/load_models.py @@ -5,13 +5,9 @@ import torch -from .inner_rasp.cavity_model import ( - CavityModel, - DownstreamModel, -) +from .inner_rasp.cavity_model import CavityModel, DownstreamModel from .inner_rasp.helpers import init_lin_weights - THIS_DIR = Path(__file__).parent.resolve() HOME_DIR = THIS_DIR.home() RASP_DIR = HOME_DIR / ".poli_objectives" / "rasp" diff --git a/src/poli/core/util/proteins/rasp/rasp_interface.py b/src/poli/core/util/proteins/rasp/rasp_interface.py index e3e95ff4..1dad5e09 100644 --- a/src/poli/core/util/proteins/rasp/rasp_interface.py +++ b/src/poli/core/util/proteins/rasp/rasp_interface.py @@ -1,7 +1,7 @@ """An interface to the original RaSP codebase. This module takes and adapts RaSP's original implementation -(which can be found at [1]), and writes an interface that +(which can be found at [1]), and writes an interface that handles the preprocessing and inference steps. RaSP, like foldx [2], predicts the effect of mutations on protein @@ -37,45 +37,33 @@ 33(suppl_2), W382-W388. """ -from typing import List -from pathlib import Path -import os, stat -import subprocess import logging +import os +import stat +import subprocess import traceback +from pathlib import Path +from typing import List -import pandas as pd import numpy as np - +import pandas as pd from Bio.PDB.Polypeptide import index_to_one, one_to_index - -from pdbtools.pdb_selchain import run as pdb_selchain_run from pdbtools.pdb_delhetatm import run as pdb_delhetatm_run from pdbtools.pdb_delres import run as pdb_delres_run from pdbtools.pdb_fixinsert import run as pdb_fixinsert_run +from pdbtools.pdb_selchain import run as pdb_selchain_run from pdbtools.pdb_tidy import run as pdb_tidy_run -from .inner_rasp.cavity_model import ( - ResidueEnvironmentsDataset, -) - -from .inner_rasp.helpers import ( - ds_pred, -) -from .inner_rasp.pdb_parser_scripts.clean_pdb import ( - clean_pdb, -) -from .inner_rasp.pdb_parser_scripts.extract_environments import ( - extract_environments, -) - - -from poli.core.util.proteins.mutations import edits_between_strings from poli.core.util.files.download_files_from_github import ( download_file_from_github_repository, ) from poli.core.util.files.integrity import compute_md5_from_filepath +from poli.core.util.proteins.mutations import edits_between_strings +from .inner_rasp.cavity_model import ResidueEnvironmentsDataset +from .inner_rasp.helpers import ds_pred +from .inner_rasp.pdb_parser_scripts.clean_pdb import clean_pdb +from .inner_rasp.pdb_parser_scripts.extract_environments import extract_environments THIS_DIR = Path(__file__).parent.resolve() HOME_DIR = THIS_DIR.home() diff --git a/src/poli/core/util/seeding/__init__.py b/src/poli/core/util/seeding/__init__.py index a10af6ef..e15ab2c8 100644 --- a/src/poli/core/util/seeding/__init__.py +++ b/src/poli/core/util/seeding/__init__.py @@ -1,3 +1,3 @@ """Utilities for seeding random number generators.""" -from .seeding import seed_numpy, seed_python, seed_torch, seed_python_numpy_and_torch +from .seeding import seed_numpy, seed_python, seed_python_numpy_and_torch, seed_torch diff --git a/src/poli/core/util/seeding/seeding.py b/src/poli/core/util/seeding/seeding.py index 8aff08b4..987a43b4 100644 --- a/src/poli/core/util/seeding/seeding.py +++ b/src/poli/core/util/seeding/seeding.py @@ -1,8 +1,9 @@ """Utilities for seeding random number generators.""" -import numpy as np import random +import numpy as np + def seed_numpy(seed: int = None) -> None: """ diff --git a/src/poli/external_isolated_function_script.py b/src/poli/external_isolated_function_script.py index 155e13ab..4c2aed18 100644 --- a/src/poli/external_isolated_function_script.py +++ b/src/poli/external_isolated_function_script.py @@ -3,17 +3,16 @@ The equivalent of objective, but for isolated black boxes instead of problem factories. """ +import argparse import logging import os import sys -import argparse import traceback from poli.core.abstract_isolated_function import AbstractIsolatedFunction from poli.core.util.inter_process_communication.process_wrapper import get_connection from poli.core.util.seeding import seed_python_numpy_and_torch - ADDITIONAL_IMPORT_SEARCH_PATHES_KEY = "ADDITIONAL_IMPORT_PATHS" diff --git a/src/poli/external_problem_factory_script.py b/src/poli/external_problem_factory_script.py index 28c693db..25a69437 100644 --- a/src/poli/external_problem_factory_script.py +++ b/src/poli/external_problem_factory_script.py @@ -1,15 +1,14 @@ """Executable script used for isolation of objective factories and functions.""" +import argparse import logging import os import sys -import argparse import traceback from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.util.inter_process_communication.process_wrapper import get_connection - ADDITIONAL_IMPORT_SEARCH_PATHES_KEY = "ADDITIONAL_IMPORT_PATHS" diff --git a/src/poli/objective_factory.py b/src/poli/objective_factory.py index 03681a40..fba11cce 100644 --- a/src/poli/objective_factory.py +++ b/src/poli/objective_factory.py @@ -2,22 +2,24 @@ Creates objective functions by providing a common interface to all factories in the repository. """ -from typing import Tuple, Any -import numpy as np -from pathlib import Path import configparser import logging +from pathlib import Path +from typing import Any, Tuple + +import numpy as np from poli.core import registry from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_problem_factory import AbstractProblemFactory +from poli.core.problem import Problem from poli.core.registry import ( - _RUN_SCRIPT_LOCATION, - _OBSERVER, _DEFAULT, - register_problem_from_repository, _DEFAULT_OBSERVER_RUN_SCRIPT, + _OBSERVER, + _RUN_SCRIPT_LOCATION, DEFAULT_OBSERVER_NAME, + register_problem_from_repository, ) from poli.core.util.abstract_observer import AbstractObserver from poli.core.util.algorithm_observer_wrapper import AlgorithmObserverWrapper @@ -25,9 +27,7 @@ from poli.core.util.external_observer import ExternalObserver from poli.core.util.inter_process_communication.process_wrapper import ProcessWrapper from poli.core.util.isolation.external_black_box import ExternalBlackBox -from poli.core.problem import Problem from poli.external_problem_factory_script import dynamically_instantiate - from poli.objective_repository import AVAILABLE_OBJECTIVES, AVAILABLE_PROBLEM_FACTORIES diff --git a/src/poli/objective_repository/__init__.py b/src/poli/objective_repository/__init__.py index 66debbe2..256c66e6 100644 --- a/src/poli/objective_repository/__init__.py +++ b/src/poli/objective_repository/__init__.py @@ -3,101 +3,28 @@ from pathlib import Path -# Toy examples -from .white_noise.register import WhiteNoiseProblemFactory, WhiteNoiseBlackBox -from .aloha.register import AlohaProblemFactory, AlohaBlackBox -from .toy_continuous_problem.register import ( - ToyContinuousProblemFactory, - ToyContinuousBlackBox, -) - -# Discrete toy examples -from .ehrlich.register import EhrlichBlackBox, EhrlichProblemFactory - -# Chemistry black boxes -from .dockstring.register import DockstringProblemFactory, DockstringBlackBox -from .drd3_docking.register import DRD3ProblemFactory, DRD3BlackBox -from .sa_tdc.register import SAProblemFactory, SABlackBox -from .rdkit_logp.register import LogPBlackBox, LogPProblemFactory -from .rdkit_qed.register import QEDBlackBox, QEDProblemFactory -from .gsk3_beta.register import GSK3BetaBlackBox, GSK3BetaProblemFactory -from .drd2_docking.register import DRD2BlackBox, DRD2ProblemFactory -from .jnk3.register import JNK3BlackBox, JNK3ProblemFactory -from .celecoxib_rediscovery.register import ( - CelecoxibRediscoveryBlackBox, - CelecoxibRediscoveryProblemFactory, -) -from .thiothixene_rediscovery.register import ( - ThiothixeneRediscoveryBlackBox, - ThiothixeneRediscoveryProblemFactory, -) -from .troglitazone_rediscovery.register import ( - TroglitazoneRediscoveryBlackBox, - TroglitazoneRediscoveryProblemFactory, -) from .albuterol_similarity.register import ( AlbuterolSimilarityBlackBox, AlbuterolSimilarityProblemFactory, ) -from .mestranol_similarity.register import ( - MestranolSimilarityBlackBox, - MestranolSimilarityProblemFactory, -) -from .amlodipine_mpo.register import ( - AmlodipineMPOProblemFactory, - AmlodipineMPOBlackBox, +from .aloha.register import AlohaBlackBox, AlohaProblemFactory +from .amlodipine_mpo.register import AmlodipineMPOBlackBox, AmlodipineMPOProblemFactory +from .celecoxib_rediscovery.register import ( + CelecoxibRediscoveryBlackBox, + CelecoxibRediscoveryProblemFactory, ) +from .deco_hop.register import DecoHopBlackBox, DecoHopProblemFactory + +# Chemistry black boxes +from .dockstring.register import DockstringBlackBox, DockstringProblemFactory +from .drd2_docking.register import DRD2BlackBox, DRD2ProblemFactory +from .drd3_docking.register import DRD3BlackBox, DRD3ProblemFactory + +# Discrete toy examples +from .ehrlich.register import EhrlichBlackBox, EhrlichProblemFactory from .fexofenadine_mpo.register import ( - FexofenadineMPOProblemFactory, FexofenadineMPOBlackBox, -) -from .osimetrinib_mpo.register import ( - OsimetrinibMPOProblemFactory, - OsimetrinibMPOBlackBox, -) -from .perindopril_mpo.register import ( - PerindoprilMPOProblemFactory, - PerindoprilMPOBlackBox, -) -from .ranolazine_mpo.register import ( - RanolazineMPOProblemFactory, - RanolazineMPOBlackBox, -) -from .sitagliptin_mpo.register import ( - SitagliptinMPOProblemFactory, - SitagliptinMPOBlackBox, -) -from .zaleplon_mpo.register import ( - ZaleplonMPOProblemFactory, - ZaleplonMPOBlackBox, -) -from .deco_hop.register import ( - DecoHopProblemFactory, - DecoHopBlackBox, -) -from .scaffold_hop.register import ( - ScaffoldHopProblemFactory, - ScaffoldHopBlackBox, -) -from .isomer_c7h8n2o2.register import ( - IsomerC7H8N2O2ProblemFactory, - IsomerC7H8N2O2BlackBox, -) -from .isomer_c9h10n2o2pf2cl.register import ( - IsomerC9H10N2O2PF2ClProblemFactory, - IsomerC9H10N2O2PF2ClBlackBox, -) -from .median_1.register import ( - Median1ProblemFactory, - Median1BlackBox, -) -from .median_2.register import ( - Median2ProblemFactory, - Median2BlackBox, -) -from .valsartan_smarts.register import ( - ValsartanSMARTSProblemFactory, - ValsartanSMARTSBlackBox, + FexofenadineMPOProblemFactory, ) # Protein black boxes @@ -113,21 +40,73 @@ ) from .gfp_cbas.register import GFPCBasBlackBox, GFPCBasProblemFactory from .gfp_select.register import GFPSelectionBlackBox, GFPSelectionProblemFactory +from .gsk3_beta.register import GSK3BetaBlackBox, GSK3BetaProblemFactory +from .isomer_c7h8n2o2.register import ( + IsomerC7H8N2O2BlackBox, + IsomerC7H8N2O2ProblemFactory, +) +from .isomer_c9h10n2o2pf2cl.register import ( + IsomerC9H10N2O2PF2ClBlackBox, + IsomerC9H10N2O2PF2ClProblemFactory, +) +from .jnk3.register import JNK3BlackBox, JNK3ProblemFactory +from .median_1.register import Median1BlackBox, Median1ProblemFactory +from .median_2.register import Median2BlackBox, Median2ProblemFactory +from .mestranol_similarity.register import ( + MestranolSimilarityBlackBox, + MestranolSimilarityProblemFactory, +) +from .osimetrinib_mpo.register import ( + OsimetrinibMPOBlackBox, + OsimetrinibMPOProblemFactory, +) from .penalized_logp_lambo.register import ( PenalizedLogPLamboBlackBox, PenalizedLogPLamboProblemFactory, ) +from .perindopril_mpo.register import ( + PerindoprilMPOBlackBox, + PerindoprilMPOProblemFactory, +) +from .ranolazine_mpo.register import RanolazineMPOBlackBox, RanolazineMPOProblemFactory from .rasp.register import RaspBlackBox, RaspProblemFactory +from .rdkit_logp.register import LogPBlackBox, LogPProblemFactory +from .rdkit_qed.register import QEDBlackBox, QEDProblemFactory from .rfp_foldx_stability_and_sasa.register import ( RFPFoldXStabilityAndSASAProblemFactory, ) +from .sa_tdc.register import SABlackBox, SAProblemFactory +from .scaffold_hop.register import ScaffoldHopBlackBox, ScaffoldHopProblemFactory +from .sitagliptin_mpo.register import ( + SitagliptinMPOBlackBox, + SitagliptinMPOProblemFactory, +) # Other from .super_mario_bros.register import ( - SuperMarioBrosProblemFactory, SuperMarioBrosBlackBox, + SuperMarioBrosProblemFactory, +) +from .thiothixene_rediscovery.register import ( + ThiothixeneRediscoveryBlackBox, + ThiothixeneRediscoveryProblemFactory, +) +from .toy_continuous_problem.register import ( + ToyContinuousBlackBox, + ToyContinuousProblemFactory, +) +from .troglitazone_rediscovery.register import ( + TroglitazoneRediscoveryBlackBox, + TroglitazoneRediscoveryProblemFactory, +) +from .valsartan_smarts.register import ( + ValsartanSMARTSBlackBox, + ValsartanSMARTSProblemFactory, ) +# Toy examples +from .white_noise.register import WhiteNoiseBlackBox, WhiteNoiseProblemFactory +from .zaleplon_mpo.register import ZaleplonMPOBlackBox, ZaleplonMPOProblemFactory THIS_DIR = Path(__file__).parent.resolve() diff --git a/src/poli/objective_repository/albuterol_similarity/register.py b/src/poli/objective_repository/albuterol_similarity/register.py index d6a671f6..86ee0b50 100644 --- a/src/poli/objective_repository/albuterol_similarity/register.py +++ b/src/poli/objective_repository/albuterol_similarity/register.py @@ -20,20 +20,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.albuterol_similarity.information import ( albuterol_similarity_info, ) diff --git a/src/poli/objective_repository/aloha/register.py b/src/poli/objective_repository/aloha/register.py index 922e74f2..12e56b8c 100644 --- a/src/poli/objective_repository/aloha/register.py +++ b/src/poli/objective_repository/aloha/register.py @@ -10,17 +10,16 @@ environment.yml file in this folder). """ -from typing import Literal, Tuple from string import ascii_uppercase +from typing import Literal, Tuple import numpy as np from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_problem_factory import AbstractProblemFactory -from poli.core.problem_setup_information import ProblemSetupInformation from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem - +from poli.core.problem_setup_information import ProblemSetupInformation from poli.core.util.seeding import seed_python_numpy_and_torch diff --git a/src/poli/objective_repository/amlodipine_mpo/register.py b/src/poli/objective_repository/amlodipine_mpo/register.py index 94292c6f..1b7cb425 100644 --- a/src/poli/objective_repository/amlodipine_mpo/register.py +++ b/src/poli/objective_repository/amlodipine_mpo/register.py @@ -16,23 +16,15 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - -from poli.objective_repository.amlodipine_mpo.information import ( - amlodipine_mpo_info, -) +from poli.objective_repository.amlodipine_mpo.information import amlodipine_mpo_info class AmlodipineMPOBlackBox(TDCBlackBox): diff --git a/src/poli/objective_repository/celecoxib_rediscovery/register.py b/src/poli/objective_repository/celecoxib_rediscovery/register.py index aeed19df..26735616 100644 --- a/src/poli/objective_repository/celecoxib_rediscovery/register.py +++ b/src/poli/objective_repository/celecoxib_rediscovery/register.py @@ -18,21 +18,14 @@ from typing import Literal import numpy as np - import selfies as sf - from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.celecoxib_rediscovery.information import ( celecoxib_rediscovery_info, ) diff --git a/src/poli/objective_repository/deco_hop/register.py b/src/poli/objective_repository/deco_hop/register.py index 82df8285..9a1e4aea 100644 --- a/src/poli/objective_repository/deco_hop/register.py +++ b/src/poli/objective_repository/deco_hop/register.py @@ -18,20 +18,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.deco_hop.information import deco_hop_info diff --git a/src/poli/objective_repository/dockstring/isolated_function.py b/src/poli/objective_repository/dockstring/isolated_function.py index f87f4112..7e4e695f 100644 --- a/src/poli/objective_repository/dockstring/isolated_function.py +++ b/src/poli/objective_repository/dockstring/isolated_function.py @@ -1,19 +1,17 @@ -from typing import Tuple, Literal +from typing import Literal, Tuple import numpy as np +from dockstring import load_target from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_isolated_function import AbstractIsolatedFunction from poli.core.black_box_information import BlackBoxInformation -from poli.core.util.chemistry.string_to_molecule import translate_selfies_to_smiles from poli.core.registry import register_isolated_function - +from poli.core.util.chemistry.string_to_molecule import translate_selfies_to_smiles from poli.objective_repository.dockstring.information import ( dockstring_black_box_information, ) -from dockstring import load_target - class IsolatedDockstringFunction(AbstractIsolatedFunction): """ diff --git a/src/poli/objective_repository/dockstring/register.py b/src/poli/objective_repository/dockstring/register.py index 10dcc525..f62958fe 100644 --- a/src/poli/objective_repository/dockstring/register.py +++ b/src/poli/objective_repository/dockstring/register.py @@ -18,24 +18,18 @@ from typing import Literal import numpy as np - import selfies as sf - from poli.core.abstract_black_box import AbstractBlackBox -from poli.core.black_box_information import BlackBoxInformation from poli.core.abstract_problem_factory import AbstractProblemFactory +from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import ( translate_selfies_to_smiles, translate_smiles_to_selfies, ) - -from poli.core.util.seeding import seed_python_numpy_and_torch - from poli.core.util.isolation.instancing import get_inner_function - +from poli.core.util.seeding import seed_python_numpy_and_torch from poli.objective_repository.dockstring.information import ( dockstring_black_box_information, ) diff --git a/src/poli/objective_repository/drd2_docking/register.py b/src/poli/objective_repository/drd2_docking/register.py index 400cc3b1..6915c54e 100644 --- a/src/poli/objective_repository/drd2_docking/register.py +++ b/src/poli/objective_repository/drd2_docking/register.py @@ -19,19 +19,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation +from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.drd2_docking.information import drd2_docking_info diff --git a/src/poli/objective_repository/drd3_docking/__init__.py b/src/poli/objective_repository/drd3_docking/__init__.py index 6534bf2e..03c1c8f9 100644 --- a/src/poli/objective_repository/drd3_docking/__init__.py +++ b/src/poli/objective_repository/drd3_docking/__init__.py @@ -41,7 +41,7 @@ This can easily be done by running .. code-block:: bash - + # From the base of the poli repo conda env create --file src/poli/objective_repository/ddr3_docking/environment.yml @@ -49,10 +49,10 @@ We also need `lambo`'s tasks to be available in Python's path for `poli__lambo`: .. code-block:: bash - + git clone https://github.com/samuelstanton/lambo # For reference, we use 431b052 cd lambo - pip install -e . + pip install -e . In particular, we need diff --git a/src/poli/objective_repository/drd3_docking/register.py b/src/poli/objective_repository/drd3_docking/register.py index 8cfc2e01..ce9a2cd8 100644 --- a/src/poli/objective_repository/drd3_docking/register.py +++ b/src/poli/objective_repository/drd3_docking/register.py @@ -10,21 +10,15 @@ from typing import Literal import numpy as np - import selfies as sf - from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - -from poli.core.util.isolation.instancing import instance_function_as_isolated_process - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - +from poli.core.util.isolation.instancing import instance_function_as_isolated_process from poli.core.util.seeding import seed_numpy, seed_python - from poli.objective_repository.drd3_docking.information import drd3_docking_info diff --git a/src/poli/objective_repository/ehrlich/register.py b/src/poli/objective_repository/ehrlich/register.py index 70e89bb2..79d418dd 100644 --- a/src/poli/objective_repository/ehrlich/register.py +++ b/src/poli/objective_repository/ehrlich/register.py @@ -3,7 +3,7 @@ Ehrlich functions were proposed by Stanton et al. [1] as a quick-and-easy alternative for testing discrete sequence optimizers (with protein -optimization in mind). They are deviced to +optimization in mind). They are deviced to (i) be easy to query, (ii) have feasible and unfeasible sequences, @@ -29,19 +29,14 @@ from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_problem_factory import AbstractProblemFactory - from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem +from poli.core.util.proteins.defaults import AMINO_ACIDS +from poli.core.util.seeding import seed_python_numpy_and_torch from poli.objective_repository.ehrlich._construct_feasibility_matrix import ( _construct_transition_matrix, ) -from poli.objective_repository.ehrlich.information import ( - ehrlich_info, -) - -from poli.core.util.seeding import seed_python_numpy_and_torch - -from poli.core.util.proteins.defaults import AMINO_ACIDS +from poli.objective_repository.ehrlich.information import ehrlich_info class EhrlichBlackBox(AbstractBlackBox): diff --git a/src/poli/objective_repository/fexofenadine_mpo/register.py b/src/poli/objective_repository/fexofenadine_mpo/register.py index 5b40ca3d..c729c40a 100644 --- a/src/poli/objective_repository/fexofenadine_mpo/register.py +++ b/src/poli/objective_repository/fexofenadine_mpo/register.py @@ -17,20 +17,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.fexofenadine_mpo.information import fexofenadine_mpo_info diff --git a/src/poli/objective_repository/foldx_rfp_lambo/isolated_function.py b/src/poli/objective_repository/foldx_rfp_lambo/isolated_function.py index 1a28ef58..9b37e4a2 100644 --- a/src/poli/objective_repository/foldx_rfp_lambo/isolated_function.py +++ b/src/poli/objective_repository/foldx_rfp_lambo/isolated_function.py @@ -3,26 +3,24 @@ __author__ = "Simon Bartels" import logging -import yaml -from pathlib import Path import os from collections import namedtuple +from pathlib import Path -import numpy as np import hydra -from poli.core.abstract_isolated_function import AbstractIsolatedFunction -from poli.objective_repository.foldx_rfp_lambo import PROBLEM_SEQ, CORRECT_SEQ - -from poli.core.util.files.download_files_from_github import ( - download_file_from_github_repository, -) -from poli.core.registry import register_isolated_function - import lambo +import numpy as np +import yaml +from lambo import __file__ as project_root_file from lambo.tasks.proxy_rfp.proxy_rfp import ProxyRFPTask from lambo.utils import AMINO_ACIDS -from lambo import __file__ as project_root_file +from poli.core.abstract_isolated_function import AbstractIsolatedFunction +from poli.core.registry import register_isolated_function +from poli.core.util.files.download_files_from_github import ( + download_file_from_github_repository, +) +from poli.objective_repository.foldx_rfp_lambo import CORRECT_SEQ, PROBLEM_SEQ project_root = os.path.dirname(os.path.dirname(project_root_file)) LAMBO_IN_POLI_OBJECTIVES_PATH = Path.home() / ".poli_objectives" / "lambo" diff --git a/src/poli/objective_repository/foldx_rfp_lambo/register.py b/src/poli/objective_repository/foldx_rfp_lambo/register.py index 8be137b4..cdae6474 100644 --- a/src/poli/objective_repository/foldx_rfp_lambo/register.py +++ b/src/poli/objective_repository/foldx_rfp_lambo/register.py @@ -6,17 +6,15 @@ from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_problem_factory import AbstractProblemFactory -from poli.core.problem import Problem from poli.core.black_box_information import BlackBoxInformation from poli.core.exceptions import FoldXNotFoundException - -from poli.objective_repository.foldx_rfp_lambo import PROBLEM_SEQ, CORRECT_SEQ +from poli.core.problem import Problem from poli.core.util.isolation.instancing import instance_function_as_isolated_process from poli.core.util.seeding import seed_python_numpy_and_torch - +from poli.objective_repository.foldx_rfp_lambo import CORRECT_SEQ, PROBLEM_SEQ from poli.objective_repository.foldx_rfp_lambo.information import ( - foldx_rfp_lambo_information, AMINO_ACIDS, + foldx_rfp_lambo_information, ) diff --git a/src/poli/objective_repository/foldx_sasa/information.py b/src/poli/objective_repository/foldx_sasa/information.py index bfbcf334..0bf1f06d 100644 --- a/src/poli/objective_repository/foldx_sasa/information.py +++ b/src/poli/objective_repository/foldx_sasa/information.py @@ -1,7 +1,7 @@ import numpy as np -from poli.core.util.proteins.defaults import AMINO_ACIDS from poli.core.black_box_information import BlackBoxInformation +from poli.core.util.proteins.defaults import AMINO_ACIDS foldx_sasa_info = BlackBoxInformation( name="foldx_sasa", diff --git a/src/poli/objective_repository/foldx_sasa/isolated_function.py b/src/poli/objective_repository/foldx_sasa/isolated_function.py index 197ad648..0cf0a5d4 100644 --- a/src/poli/objective_repository/foldx_sasa/isolated_function.py +++ b/src/poli/objective_repository/foldx_sasa/isolated_function.py @@ -3,13 +3,10 @@ import numpy as np +from poli.core.proteins.foldx_isolated_function import FoldxIsolatedFunction from poli.core.registry import register_isolated_function - -from poli.core.util.proteins.mutations import ( - find_closest_wildtype_pdb_file_to_mutant, -) from poli.core.util.proteins.foldx import FoldxInterface -from poli.core.proteins.foldx_isolated_function import FoldxIsolatedFunction +from poli.core.util.proteins.mutations import find_closest_wildtype_pdb_file_to_mutant class FoldXSASAIsolatedLogic(FoldxIsolatedFunction): diff --git a/src/poli/objective_repository/foldx_sasa/register.py b/src/poli/objective_repository/foldx_sasa/register.py index 0c5dd41e..72e8582e 100644 --- a/src/poli/objective_repository/foldx_sasa/register.py +++ b/src/poli/objective_repository/foldx_sasa/register.py @@ -10,7 +10,7 @@ Nucleic acids research, 33(suppl_2), W382-W388. [2] Cock PA, Antao T, Chang JT, Chapman BA, Cox CJ, Dalke A, Friedberg I, Hamelryck T, Kauff F, Wilczynski B and de Hoon MJL (2009) Biopython: - freely available Python tools for computational molecular biology and + freely available Python tools for computational molecular biology and bioinformatics. Bioinformatics, 25, 1422-1423 """ @@ -22,13 +22,10 @@ from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation -from poli.core.problem import Problem from poli.core.exceptions import FoldXNotFoundException - +from poli.core.problem import Problem from poli.core.util.isolation.instancing import get_inner_function - from poli.core.util.seeding import seed_python_numpy_and_torch - from poli.objective_repository.foldx_sasa.information import foldx_sasa_info diff --git a/src/poli/objective_repository/foldx_stability/information.py b/src/poli/objective_repository/foldx_stability/information.py index 13da23be..f805bb0b 100644 --- a/src/poli/objective_repository/foldx_stability/information.py +++ b/src/poli/objective_repository/foldx_stability/information.py @@ -1,7 +1,7 @@ import numpy as np -from poli.core.util.proteins.defaults import AMINO_ACIDS from poli.core.black_box_information import BlackBoxInformation +from poli.core.util.proteins.defaults import AMINO_ACIDS foldx_stability_info = BlackBoxInformation( name="foldx_stability", diff --git a/src/poli/objective_repository/foldx_stability/isolated_function.py b/src/poli/objective_repository/foldx_stability/isolated_function.py index 8ecb63af..03f2d8c4 100644 --- a/src/poli/objective_repository/foldx_stability/isolated_function.py +++ b/src/poli/objective_repository/foldx_stability/isolated_function.py @@ -3,13 +3,10 @@ import numpy as np +from poli.core.proteins.foldx_isolated_function import FoldxIsolatedFunction from poli.core.registry import register_isolated_function - -from poli.core.util.proteins.mutations import ( - find_closest_wildtype_pdb_file_to_mutant, -) from poli.core.util.proteins.foldx import FoldxInterface -from poli.core.proteins.foldx_isolated_function import FoldxIsolatedFunction +from poli.core.util.proteins.mutations import find_closest_wildtype_pdb_file_to_mutant class FoldXStabilityIsolatedLogic(FoldxIsolatedFunction): diff --git a/src/poli/objective_repository/foldx_stability/register.py b/src/poli/objective_repository/foldx_stability/register.py index 5cfe0644..4194e404 100644 --- a/src/poli/objective_repository/foldx_stability/register.py +++ b/src/poli/objective_repository/foldx_stability/register.py @@ -2,7 +2,7 @@ This script registers the stability FoldX black box and objective factory. FoldX [1] is a simulator that allows for computing the difference -in free energy between a wildtype protein and a mutated protein. We +in free energy between a wildtype protein and a mutated protein. We also use biopython for pre-processing the PDB files [2]. [1] Schymkowitz, J., Borg, J., Stricher, F., Nys, R., Rousseau, F., @@ -10,7 +10,7 @@ Nucleic acids research, 33(suppl_2), W382-W388. [2] Cock PA, Antao T, Chang JT, Chapman BA, Cox CJ, Dalke A, Friedberg I, Hamelryck T, Kauff F, Wilczynski B and de Hoon MJL (2009) Biopython: - freely available Python tools for computational molecular biology and + freely available Python tools for computational molecular biology and bioinformatics. Bioinformatics, 25, 1422-1423 """ @@ -21,16 +21,13 @@ import numpy as np +from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation -from poli.core.problem import Problem -from poli.core.abstract_black_box import AbstractBlackBox from poli.core.exceptions import FoldXNotFoundException - -from poli.core.util.seeding import seed_python_numpy_and_torch - +from poli.core.problem import Problem from poli.core.util.isolation.instancing import get_inner_function - +from poli.core.util.seeding import seed_python_numpy_and_torch from poli.objective_repository.foldx_stability.information import foldx_stability_info diff --git a/src/poli/objective_repository/foldx_stability_and_sasa/information.py b/src/poli/objective_repository/foldx_stability_and_sasa/information.py index 8ecca5d6..1942ae88 100644 --- a/src/poli/objective_repository/foldx_stability_and_sasa/information.py +++ b/src/poli/objective_repository/foldx_stability_and_sasa/information.py @@ -1,7 +1,7 @@ import numpy as np -from poli.core.util.proteins.defaults import AMINO_ACIDS from poli.core.black_box_information import BlackBoxInformation +from poli.core.util.proteins.defaults import AMINO_ACIDS foldx_stability_and_sasa_info = BlackBoxInformation( name="foldx_stability_and_sasa", diff --git a/src/poli/objective_repository/foldx_stability_and_sasa/isolated_function.py b/src/poli/objective_repository/foldx_stability_and_sasa/isolated_function.py index d1309f68..36f5fbd0 100644 --- a/src/poli/objective_repository/foldx_stability_and_sasa/isolated_function.py +++ b/src/poli/objective_repository/foldx_stability_and_sasa/isolated_function.py @@ -3,13 +3,10 @@ import numpy as np +from poli.core.proteins.foldx_isolated_function import FoldxIsolatedFunction from poli.core.registry import register_isolated_function - -from poli.core.util.proteins.mutations import ( - find_closest_wildtype_pdb_file_to_mutant, -) from poli.core.util.proteins.foldx import FoldxInterface -from poli.core.proteins.foldx_isolated_function import FoldxIsolatedFunction +from poli.core.util.proteins.mutations import find_closest_wildtype_pdb_file_to_mutant class FoldXStabilitityAndSASAIsolatedLogic(FoldxIsolatedFunction): diff --git a/src/poli/objective_repository/foldx_stability_and_sasa/register.py b/src/poli/objective_repository/foldx_stability_and_sasa/register.py index a69acc65..5efe0d39 100644 --- a/src/poli/objective_repository/foldx_stability_and_sasa/register.py +++ b/src/poli/objective_repository/foldx_stability_and_sasa/register.py @@ -12,7 +12,7 @@ Nucleic acids research, 33(suppl_2), W382-W388. [2] Cock PA, Antao T, Chang JT, Chapman BA, Cox CJ, Dalke A, Friedberg I, Hamelryck T, Kauff F, Wilczynski B and de Hoon MJL (2009) Biopython: - freely available Python tools for computational molecular biology and + freely available Python tools for computational molecular biology and bioinformatics. Bioinformatics, 25, 1422-1423 """ @@ -22,16 +22,12 @@ import numpy as np from poli.core.abstract_black_box import AbstractBlackBox -from poli.core.black_box_information import BlackBoxInformation -from poli.core.problem import Problem from poli.core.abstract_problem_factory import AbstractProblemFactory +from poli.core.black_box_information import BlackBoxInformation from poli.core.exceptions import FoldXNotFoundException - - -from poli.core.util.seeding import seed_python_numpy_and_torch - +from poli.core.problem import Problem from poli.core.util.isolation.instancing import get_inner_function - +from poli.core.util.seeding import seed_python_numpy_and_torch from poli.objective_repository.foldx_stability_and_sasa.information import ( foldx_stability_and_sasa_info, ) diff --git a/src/poli/objective_repository/gfp_cbas/cbas_alphabet_preprocessing.py b/src/poli/objective_repository/gfp_cbas/cbas_alphabet_preprocessing.py index a11d9b85..54cd711d 100644 --- a/src/poli/objective_repository/gfp_cbas/cbas_alphabet_preprocessing.py +++ b/src/poli/objective_repository/gfp_cbas/cbas_alphabet_preprocessing.py @@ -1,10 +1,10 @@ import os.path + import numpy as np import pandas as pd from .information import AA - AA_IDX = {AA[i]: i for i in range(len(AA))} BLOSUM = np.array( diff --git a/src/poli/objective_repository/gfp_cbas/gfp_gp.py b/src/poli/objective_repository/gfp_cbas/gfp_gp.py index 8bcdd40e..13d60994 100644 --- a/src/poli/objective_repository/gfp_cbas/gfp_gp.py +++ b/src/poli/objective_repository/gfp_cbas/gfp_gp.py @@ -7,9 +7,11 @@ import warnings warnings.filterwarnings("ignore") -from typing import Tuple from pathlib import Path +from typing import Tuple + import numpy as np + from poli.objective_repository.gfp_cbas import BLOSUM diff --git a/src/poli/objective_repository/gfp_cbas/isolated_function.py b/src/poli/objective_repository/gfp_cbas/isolated_function.py index b58e76bc..84a25bf5 100644 --- a/src/poli/objective_repository/gfp_cbas/isolated_function.py +++ b/src/poli/objective_repository/gfp_cbas/isolated_function.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import List, Tuple, Literal +from typing import List, Literal, Tuple from warnings import warn import numpy as np @@ -8,7 +8,6 @@ from poli.core.abstract_isolated_function import AbstractIsolatedFunction from poli.core.black_box_information import BlackBoxInformation - from poli.objective_repository.gfp_cbas.cbas_alphabet_preprocessing import ( convert_aas_to_idx_array, one_hot_encode_aa_array, diff --git a/src/poli/objective_repository/gfp_cbas/register.py b/src/poli/objective_repository/gfp_cbas/register.py index 7416336b..057651e8 100644 --- a/src/poli/objective_repository/gfp_cbas/register.py +++ b/src/poli/objective_repository/gfp_cbas/register.py @@ -1,4 +1,4 @@ -from typing import Tuple, Literal +from typing import Literal, Tuple from warnings import warn import numpy as np @@ -7,13 +7,10 @@ from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem - +from poli.core.util.isolation.instancing import get_inner_function from poli.core.util.seeding import seed_python_numpy_and_torch - from poli.objective_repository.gfp_cbas.information import gfp_cbas_info -from poli.core.util.isolation.instancing import get_inner_function - class GFPCBasBlackBox(AbstractBlackBox): def __init__( diff --git a/src/poli/objective_repository/gfp_select/information.py b/src/poli/objective_repository/gfp_select/information.py index a56bde31..6f1c65c0 100644 --- a/src/poli/objective_repository/gfp_select/information.py +++ b/src/poli/objective_repository/gfp_select/information.py @@ -1,5 +1,4 @@ from poli.core.black_box_information import BlackBoxInformation - from poli.core.util.proteins.defaults import AMINO_ACIDS gfp_select_info = BlackBoxInformation( diff --git a/src/poli/objective_repository/gfp_select/isolated_function.py b/src/poli/objective_repository/gfp_select/isolated_function.py index 32d68251..7781a91f 100644 --- a/src/poli/objective_repository/gfp_select/isolated_function.py +++ b/src/poli/objective_repository/gfp_select/isolated_function.py @@ -1,5 +1,6 @@ from pathlib import Path from typing import Tuple + import numpy as np import pandas as pd diff --git a/src/poli/objective_repository/gfp_select/register.py b/src/poli/objective_repository/gfp_select/register.py index efdcca91..8d416289 100644 --- a/src/poli/objective_repository/gfp_select/register.py +++ b/src/poli/objective_repository/gfp_select/register.py @@ -1,14 +1,13 @@ from typing import Tuple + import numpy as np from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem -from poli.core.util.seeding import seed_python_numpy_and_torch - from poli.core.util.isolation.instancing import instance_function_as_isolated_process - +from poli.core.util.seeding import seed_python_numpy_and_torch from poli.objective_repository.gfp_select.information import gfp_select_info diff --git a/src/poli/objective_repository/gsk3_beta/register.py b/src/poli/objective_repository/gsk3_beta/register.py index 15efc282..3c9260e6 100644 --- a/src/poli/objective_repository/gsk3_beta/register.py +++ b/src/poli/objective_repository/gsk3_beta/register.py @@ -24,18 +24,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - from poli.objective_repository.gsk3_beta.information import gsk3_beta_info diff --git a/src/poli/objective_repository/isomer_c7h8n2o2/register.py b/src/poli/objective_repository/isomer_c7h8n2o2/register.py index 9c72373b..87a187ec 100644 --- a/src/poli/objective_repository/isomer_c7h8n2o2/register.py +++ b/src/poli/objective_repository/isomer_c7h8n2o2/register.py @@ -16,20 +16,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.isomer_c7h8n2o2.information import isomer_c7h8n2o2_info diff --git a/src/poli/objective_repository/isomer_c9h10n2o2pf2cl/register.py b/src/poli/objective_repository/isomer_c9h10n2o2pf2cl/register.py index 466ced67..de754851 100644 --- a/src/poli/objective_repository/isomer_c9h10n2o2pf2cl/register.py +++ b/src/poli/objective_repository/isomer_c9h10n2o2pf2cl/register.py @@ -17,20 +17,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.isomer_c9h10n2o2pf2cl.information import ( isomer_c9h10n2o2pf2cl_info, ) diff --git a/src/poli/objective_repository/jnk3/register.py b/src/poli/objective_repository/jnk3/register.py index aba63f05..eef6317a 100644 --- a/src/poli/objective_repository/jnk3/register.py +++ b/src/poli/objective_repository/jnk3/register.py @@ -23,18 +23,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - from poli.objective_repository.jnk3.information import jnk3_info diff --git a/src/poli/objective_repository/median_1/register.py b/src/poli/objective_repository/median_1/register.py index f3aa6a07..9bdca95f 100644 --- a/src/poli/objective_repository/median_1/register.py +++ b/src/poli/objective_repository/median_1/register.py @@ -17,20 +17,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.median_1.information import median_1_info diff --git a/src/poli/objective_repository/median_2/register.py b/src/poli/objective_repository/median_2/register.py index 4278d206..2e33d5a3 100644 --- a/src/poli/objective_repository/median_2/register.py +++ b/src/poli/objective_repository/median_2/register.py @@ -16,20 +16,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.median_2.information import median_2_info diff --git a/src/poli/objective_repository/mestranol_similarity/register.py b/src/poli/objective_repository/mestranol_similarity/register.py index 7584d9c5..48212d0e 100644 --- a/src/poli/objective_repository/mestranol_similarity/register.py +++ b/src/poli/objective_repository/mestranol_similarity/register.py @@ -20,20 +20,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.mestranol_similarity.information import ( mestranol_similarity_info, ) diff --git a/src/poli/objective_repository/osimetrinib_mpo/register.py b/src/poli/objective_repository/osimetrinib_mpo/register.py index c8eb49bd..35dda2e2 100644 --- a/src/poli/objective_repository/osimetrinib_mpo/register.py +++ b/src/poli/objective_repository/osimetrinib_mpo/register.py @@ -16,20 +16,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.osimetrinib_mpo.information import osimetrinib_mpo_info diff --git a/src/poli/objective_repository/penalized_logp_lambo/isolated_function.py b/src/poli/objective_repository/penalized_logp_lambo/isolated_function.py index 2648d8e0..4bee9f4f 100644 --- a/src/poli/objective_repository/penalized_logp_lambo/isolated_function.py +++ b/src/poli/objective_repository/penalized_logp_lambo/isolated_function.py @@ -13,22 +13,19 @@ import logging import os -import numpy as np from pathlib import Path import lambo -from lambo.tasks.chem.logp import logP +import numpy as np from lambo import __file__ as project_root_file +from lambo.tasks.chem.logp import logP from poli.core.abstract_isolated_function import AbstractIsolatedFunction - from poli.core.util.chemistry.string_to_molecule import translate_selfies_to_smiles - from poli.core.util.files.download_files_from_github import ( download_file_from_github_repository, ) - project_root = os.path.dirname(os.path.dirname(project_root_file)) LAMBO_IN_POLI_OBJECTIVES_PATH = Path.home() / ".poli_objectives" / "lambo" LAMBO_IN_POLI_OBJECTIVES_PATH.mkdir(parents=True, exist_ok=True) diff --git a/src/poli/objective_repository/penalized_logp_lambo/register.py b/src/poli/objective_repository/penalized_logp_lambo/register.py index c36b2e25..9505cfb0 100644 --- a/src/poli/objective_repository/penalized_logp_lambo/register.py +++ b/src/poli/objective_repository/penalized_logp_lambo/register.py @@ -11,18 +11,16 @@ arXiv, July 12, 2022. http://arxiv.org/abs/2203.12742. """ -from typing import Tuple, Literal +from typing import Literal, Tuple + import numpy as np from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem - from poli.core.util.isolation.instancing import get_inner_function - from poli.core.util.seeding import seed_python_numpy_and_torch - from poli.objective_repository.penalized_logp_lambo.information import ( penalized_logp_lambo_info, ) diff --git a/src/poli/objective_repository/perindopril_mpo/register.py b/src/poli/objective_repository/perindopril_mpo/register.py index 3c3a4027..db61fe85 100644 --- a/src/poli/objective_repository/perindopril_mpo/register.py +++ b/src/poli/objective_repository/perindopril_mpo/register.py @@ -16,20 +16,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.perindopril_mpo.information import perindopril_mpo_info diff --git a/src/poli/objective_repository/ranolazine_mpo/register.py b/src/poli/objective_repository/ranolazine_mpo/register.py index bae99c00..9144861b 100644 --- a/src/poli/objective_repository/ranolazine_mpo/register.py +++ b/src/poli/objective_repository/ranolazine_mpo/register.py @@ -16,20 +16,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.ranolazine_mpo.information import ranolazine_mpo_info diff --git a/src/poli/objective_repository/rasp/information.py b/src/poli/objective_repository/rasp/information.py index 8c9bf7e0..d982ae13 100644 --- a/src/poli/objective_repository/rasp/information.py +++ b/src/poli/objective_repository/rasp/information.py @@ -1,7 +1,6 @@ import numpy as np from poli.core.black_box_information import BlackBoxInformation - from poli.core.util.proteins.defaults import AMINO_ACIDS rasp_information = BlackBoxInformation( diff --git a/src/poli/objective_repository/rasp/isolated_function.py b/src/poli/objective_repository/rasp/isolated_function.py index e572f710..6c39fff8 100644 --- a/src/poli/objective_repository/rasp/isolated_function.py +++ b/src/poli/objective_repository/rasp/isolated_function.py @@ -17,26 +17,24 @@ """ -from typing import Union, List +from collections import defaultdict from pathlib import Path -from uuid import uuid4 from time import time -from collections import defaultdict - -from poli.core.abstract_isolated_function import AbstractIsolatedFunction +from typing import List, Union +from uuid import uuid4 -from poli.core.util.proteins.rasp import ( - RaspInterface, - load_cavity_and_downstream_models, -) +import numpy as np +from poli.core.abstract_isolated_function import AbstractIsolatedFunction +from poli.core.util.proteins.mutations import find_closest_wildtype_pdb_file_to_mutant from poli.core.util.proteins.pdb_parsing import ( parse_pdb_as_residue_strings, parse_pdb_as_residues, ) -from poli.core.util.proteins.mutations import find_closest_wildtype_pdb_file_to_mutant - -import numpy as np +from poli.core.util.proteins.rasp import ( + RaspInterface, + load_cavity_and_downstream_models, +) RASP_NUM_ENSEMBLE = 10 RASP_DEVICE = "cpu" diff --git a/src/poli/objective_repository/rasp/register.py b/src/poli/objective_repository/rasp/register.py index 8f0e58ae..fa918f61 100644 --- a/src/poli/objective_repository/rasp/register.py +++ b/src/poli/objective_repository/rasp/register.py @@ -17,24 +17,20 @@ """ -from typing import Union, List from pathlib import Path +from typing import List, Union + +import numpy as np from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem - -from poli.core.util.seeding import seed_python_numpy_and_torch - from poli.core.util.isolation.instancing import get_inner_function - +from poli.core.util.seeding import seed_python_numpy_and_torch from poli.objective_repository.rasp.information import rasp_information -import numpy as np - - class RaspBlackBox(AbstractBlackBox): """ RaSP Black Box implementation. diff --git a/src/poli/objective_repository/rdkit_logp/register.py b/src/poli/objective_repository/rdkit_logp/register.py index 3fb1cc8a..d26053fb 100644 --- a/src/poli/objective_repository/rdkit_logp/register.py +++ b/src/poli/objective_repository/rdkit_logp/register.py @@ -9,21 +9,17 @@ strings. """ -from typing import Tuple, Literal +from typing import Literal, Tuple import numpy as np - from rdkit.Chem import Descriptors from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import strings_to_molecules - from poli.core.util.seeding import seed_python_numpy_and_torch - from poli.objective_repository.rdkit_logp.information import rdkit_logp_info diff --git a/src/poli/objective_repository/rdkit_qed/register.py b/src/poli/objective_repository/rdkit_qed/register.py index 3b236247..a4c4a183 100644 --- a/src/poli/objective_repository/rdkit_qed/register.py +++ b/src/poli/objective_repository/rdkit_qed/register.py @@ -10,22 +10,18 @@ 0 and 1. We allow for both SMILES and SELFIES strings. """ -from typing import Tuple, List, Literal +from typing import List, Literal, Tuple import numpy as np - from rdkit.Chem.QED import qed from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_problem_factory import AbstractProblemFactory -from poli.core.problem_setup_information import ProblemSetupInformation from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem - +from poli.core.problem_setup_information import ProblemSetupInformation from poli.core.util.chemistry.string_to_molecule import strings_to_molecules - from poli.core.util.seeding import seed_python_numpy_and_torch - from poli.objective_repository.rdkit_qed.information import rdkit_qed_info diff --git a/src/poli/objective_repository/rfp_foldx_stability_and_sasa/information.py b/src/poli/objective_repository/rfp_foldx_stability_and_sasa/information.py index 15ac8f77..a353df2f 100644 --- a/src/poli/objective_repository/rfp_foldx_stability_and_sasa/information.py +++ b/src/poli/objective_repository/rfp_foldx_stability_and_sasa/information.py @@ -1,7 +1,7 @@ import numpy as np -from poli.core.util.proteins.defaults import AMINO_ACIDS from poli.core.black_box_information import BlackBoxInformation +from poli.core.util.proteins.defaults import AMINO_ACIDS rfp_foldx_stability_and_sasa_info = BlackBoxInformation( name="rfp_foldx_stability_and_sasa", diff --git a/src/poli/objective_repository/rfp_foldx_stability_and_sasa/register.py b/src/poli/objective_repository/rfp_foldx_stability_and_sasa/register.py index a2e7c05a..9b1ff5a9 100644 --- a/src/poli/objective_repository/rfp_foldx_stability_and_sasa/register.py +++ b/src/poli/objective_repository/rfp_foldx_stability_and_sasa/register.py @@ -12,23 +12,20 @@ Nucleic acids research, 33(suppl_2), W382-W388. [2] Cock PA, Antao T, Chang JT, Chapman BA, Cox CJ, Dalke A, Friedberg I, Hamelryck T, Kauff F, Wilczynski B and de Hoon MJL (2009) Biopython: - freely available Python tools for computational molecular biology and + freely available Python tools for computational molecular biology and bioinformatics. Bioinformatics, 25, 1422-1423 """ +import warnings from pathlib import Path from typing import List, Union -import warnings import numpy as np from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem - from poli.core.util.seeding import seed_numpy, seed_python - - from poli.objective_repository.foldx_stability_and_sasa.register import ( FoldXStabilityAndSASABlackBox, ) diff --git a/src/poli/objective_repository/sa_tdc/register.py b/src/poli/objective_repository/sa_tdc/register.py index eca14630..2891c4e2 100644 --- a/src/poli/objective_repository/sa_tdc/register.py +++ b/src/poli/objective_repository/sa_tdc/register.py @@ -10,20 +10,15 @@ from typing import Literal import numpy as np - import selfies as sf -from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation +from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - -from poli.core.util.seeding import seed_python_numpy_and_torch - from poli.core.util.isolation.instancing import instance_function_as_isolated_process - +from poli.core.util.seeding import seed_python_numpy_and_torch from poli.objective_repository.sa_tdc.information import sa_tdc_info diff --git a/src/poli/objective_repository/scaffold_hop/register.py b/src/poli/objective_repository/scaffold_hop/register.py index cf17359e..e2e25f34 100644 --- a/src/poli/objective_repository/scaffold_hop/register.py +++ b/src/poli/objective_repository/scaffold_hop/register.py @@ -16,20 +16,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.scaffold_hop.information import scaffold_hop_info diff --git a/src/poli/objective_repository/sitagliptin_mpo/register.py b/src/poli/objective_repository/sitagliptin_mpo/register.py index 54e8ed1a..a4eda1bc 100644 --- a/src/poli/objective_repository/sitagliptin_mpo/register.py +++ b/src/poli/objective_repository/sitagliptin_mpo/register.py @@ -16,20 +16,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.sitagliptin_mpo.information import sitagliptin_mpo_info diff --git a/src/poli/objective_repository/super_mario_bros/isolated_function.py b/src/poli/objective_repository/super_mario_bros/isolated_function.py index 1105fc7a..3a8c4b67 100644 --- a/src/poli/objective_repository/super_mario_bros/isolated_function.py +++ b/src/poli/objective_repository/super_mario_bros/isolated_function.py @@ -7,22 +7,17 @@ """ -from typing import List from pathlib import Path +from typing import List import numpy as np from poli.core.abstract_isolated_function import AbstractIsolatedFunction - +from poli.objective_repository.super_mario_bros.information import smb_info from poli.objective_repository.super_mario_bros.simulator import ( test_level_from_int_array, ) -from poli.objective_repository.super_mario_bros.information import ( - smb_info, -) - - THIS_DIR = Path(__file__).parent.resolve() # TODO: download the simulator from the internet diff --git a/src/poli/objective_repository/super_mario_bros/level_utils.py b/src/poli/objective_repository/super_mario_bros/level_utils.py index 3f266762..373d66fb 100644 --- a/src/poli/objective_repository/super_mario_bros/level_utils.py +++ b/src/poli/objective_repository/super_mario_bros/level_utils.py @@ -1,7 +1,7 @@ """Utilities for transforming levels to arrays and back.""" -from typing import List from itertools import product +from typing import List import numpy as np diff --git a/src/poli/objective_repository/super_mario_bros/register.py b/src/poli/objective_repository/super_mario_bros/register.py index 6adca07d..0d7cec22 100644 --- a/src/poli/objective_repository/super_mario_bros/register.py +++ b/src/poli/objective_repository/super_mario_bros/register.py @@ -7,8 +7,8 @@ """ -from typing import Tuple from pathlib import Path +from typing import Tuple import numpy as np @@ -16,11 +16,8 @@ from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem - -from poli.core.util.seeding import seed_python_numpy_and_torch - from poli.core.util.isolation.instancing import get_inner_function - +from poli.core.util.seeding import seed_python_numpy_and_torch from poli.objective_repository.super_mario_bros.information import smb_info THIS_DIR = Path(__file__).parent.resolve() diff --git a/src/poli/objective_repository/super_mario_bros/simulator.py b/src/poli/objective_repository/super_mario_bros/simulator.py index abd86a79..8f0b101a 100644 --- a/src/poli/objective_repository/super_mario_bros/simulator.py +++ b/src/poli/objective_repository/super_mario_bros/simulator.py @@ -4,15 +4,13 @@ When ran, it lets a human play a level. """ -import subprocess import json +import subprocess from pathlib import Path import numpy as np - from level_utils import clean_level - filepath = Path(__file__).parent.resolve() JARFILE_PATH = f"{filepath}/simulator.jar" diff --git a/src/poli/objective_repository/thiothixene_rediscovery/register.py b/src/poli/objective_repository/thiothixene_rediscovery/register.py index 202ade66..b820d4e3 100644 --- a/src/poli/objective_repository/thiothixene_rediscovery/register.py +++ b/src/poli/objective_repository/thiothixene_rediscovery/register.py @@ -18,20 +18,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.thiothixene_rediscovery.information import ( thiothixene_rediscovery_info, ) diff --git a/src/poli/objective_repository/toy_continuous_problem/register.py b/src/poli/objective_repository/toy_continuous_problem/register.py index 8acb3e37..e021519c 100644 --- a/src/poli/objective_repository/toy_continuous_problem/register.py +++ b/src/poli/objective_repository/toy_continuous_problem/register.py @@ -20,18 +20,16 @@ from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem - from poli.core.util.seeding import seed_python_numpy_and_torch +from poli.objective_repository.toy_continuous_problem.information import ( + toy_continuous_info, +) from .toy_continuous_problem import ( POSSIBLE_FUNCTIONS, - ToyContinuousProblem, - TWO_DIMENSIONAL_PROBLEMS, SIX_DIMENSIONAL_PROBLEMS, -) - -from poli.objective_repository.toy_continuous_problem.information import ( - toy_continuous_info, + TWO_DIMENSIONAL_PROBLEMS, + ToyContinuousProblem, ) diff --git a/src/poli/objective_repository/toy_continuous_problem/toy_continuous_problem.py b/src/poli/objective_repository/toy_continuous_problem/toy_continuous_problem.py index d98e9258..04d132ac 100644 --- a/src/poli/objective_repository/toy_continuous_problem/toy_continuous_problem.py +++ b/src/poli/objective_repository/toy_continuous_problem/toy_continuous_problem.py @@ -5,31 +5,31 @@ https://en.wikipedia.org/wiki/Test_functions_for_optimization """ -from typing import Literal, List +from typing import List, Literal import numpy as np from .definitions import ( - easom, - cross_in_tray, - shifted_sphere, - egg_holder, ackley_function_01, alpine_01, alpine_02, bent_cigar, + branin_2d, brown, + camelback_2d, chung_reynolds, cosine_mixture, + cross_in_tray, deb_01, deb_02, deflected_corrugated_spring, - camelback_2d, - styblinski_tang, + easom, + egg_holder, hartmann_6d, - branin_2d, - rosenbrock, levy, + rosenbrock, + shifted_sphere, + styblinski_tang, ) # Notice: these will be used by pytest to test the diff --git a/src/poli/objective_repository/troglitazone_rediscovery/register.py b/src/poli/objective_repository/troglitazone_rediscovery/register.py index 227fdbaf..e44519ca 100644 --- a/src/poli/objective_repository/troglitazone_rediscovery/register.py +++ b/src/poli/objective_repository/troglitazone_rediscovery/register.py @@ -17,20 +17,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.troglitazone_rediscovery.information import ( troglitazone_rediscovery_info, ) diff --git a/src/poli/objective_repository/valsartan_smarts/register.py b/src/poli/objective_repository/valsartan_smarts/register.py index d3ebabc1..01bd49cb 100644 --- a/src/poli/objective_repository/valsartan_smarts/register.py +++ b/src/poli/objective_repository/valsartan_smarts/register.py @@ -17,23 +17,15 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - -from poli.objective_repository.valsartan_smarts.information import ( - valsartan_smarts_info, -) +from poli.objective_repository.valsartan_smarts.information import valsartan_smarts_info class ValsartanSMARTSBlackBox(TDCBlackBox): diff --git a/src/poli/objective_repository/white_noise/register.py b/src/poli/objective_repository/white_noise/register.py index 1bc98d46..612d7eeb 100644 --- a/src/poli/objective_repository/white_noise/register.py +++ b/src/poli/objective_repository/white_noise/register.py @@ -11,13 +11,11 @@ import numpy as np +from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_problem_factory import AbstractProblemFactory +from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem -from poli.core.abstract_black_box import AbstractBlackBox - from poli.core.problem_setup_information import ProblemSetupInformation -from poli.core.black_box_information import BlackBoxInformation - from poli.core.util.seeding import seed_python_numpy_and_torch diff --git a/src/poli/objective_repository/zaleplon_mpo/register.py b/src/poli/objective_repository/zaleplon_mpo/register.py index 6f4910f7..d51526e3 100644 --- a/src/poli/objective_repository/zaleplon_mpo/register.py +++ b/src/poli/objective_repository/zaleplon_mpo/register.py @@ -16,20 +16,14 @@ from typing import Literal import numpy as np - import selfies as sf from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem - from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies - from poli.core.util.seeding import seed_numpy, seed_python - -from poli.core.chemistry.tdc_black_box import TDCBlackBox - from poli.objective_repository.zaleplon_mpo.information import zaleplon_mpo_info diff --git a/src/poli/tests/docs_examples/test_objective_functions.py b/src/poli/tests/docs_examples/test_objective_functions.py index 878709f6..e9832522 100644 --- a/src/poli/tests/docs_examples/test_objective_functions.py +++ b/src/poli/tests/docs_examples/test_objective_functions.py @@ -9,7 +9,8 @@ def test_white_noise_example(): import numpy as np - from poli.objective_repository import WhiteNoiseProblemFactory, WhiteNoiseBlackBox + + from poli.objective_repository import WhiteNoiseBlackBox, WhiteNoiseProblemFactory # Creating the black box f = WhiteNoiseBlackBox() @@ -27,7 +28,8 @@ def test_white_noise_example(): def test_aloha_example(): import numpy as np - from poli.objective_repository import AlohaProblemFactory, AlohaBlackBox + + from poli.objective_repository import AlohaBlackBox, AlohaProblemFactory # Creating the black box f = AlohaBlackBox() @@ -52,6 +54,7 @@ def test_aloha_example(): def test_toy_continuous_example(): import numpy as np + from poli.objective_repository import ( ToyContinuousBlackBox, ToyContinuousProblemFactory, @@ -87,7 +90,8 @@ def test_toy_continuous_example(): def test_qed_example(): import numpy as np - from poli.objective_repository import QEDProblemFactory, QEDBlackBox + + from poli.objective_repository import QEDBlackBox, QEDProblemFactory # Creating the black box f = QEDBlackBox(string_representation="SELFIES") @@ -107,7 +111,8 @@ def test_qed_example(): def test_logp_example(): import numpy as np - from poli.objective_repository import LogPProblemFactory, LogPBlackBox + + from poli.objective_repository import LogPBlackBox, LogPProblemFactory # Creating the black box f = LogPBlackBox(string_representation="SMILES") @@ -127,7 +132,8 @@ def test_logp_example(): def test_dockstring_example(): import numpy as np - from poli.objective_repository import DockstringProblemFactory, DockstringBlackBox + + from poli.objective_repository import DockstringBlackBox, DockstringProblemFactory # Creating the black box f = DockstringBlackBox(target_name="DRD2") @@ -156,7 +162,8 @@ def test_drd3_docking_example(): # that can't be handled by conda. We should skip this test for now. pytest.skip() import numpy as np - from poli.objective_repository import DRD3ProblemFactory, DRD3BlackBox + + from poli.objective_repository import DRD3BlackBox, DRD3ProblemFactory # Creating the black box f = DRD3BlackBox(string_representation="SMILES", force_isolation=True) @@ -182,8 +189,8 @@ def test_penalized_logp_lambo(): _ = pytest.importorskip("lambo") from poli.objective_repository import ( - PenalizedLogPLamboProblemFactory, PenalizedLogPLamboBlackBox, + PenalizedLogPLamboProblemFactory, ) # Creating the black box @@ -204,7 +211,8 @@ def test_penalized_logp_lambo(): def test_sa_tdc_example(): import numpy as np - from poli.objective_repository import SAProblemFactory, SABlackBox + + from poli.objective_repository import SABlackBox, SAProblemFactory # Creating the black box f = SABlackBox() @@ -231,8 +239,8 @@ def test_foldx_stability(): from pathlib import Path from poli.objective_repository import ( - FoldXStabilityProblemFactory, FoldXStabilityBlackBox, + FoldXStabilityProblemFactory, ) wildtype_pdb_path = ( @@ -260,7 +268,7 @@ def test_foldx_sasa(): from pathlib import Path - from poli.objective_repository import FoldXSASAProblemFactory, FoldXSASABlackBox + from poli.objective_repository import FoldXSASABlackBox, FoldXSASAProblemFactory wildtype_pdb_path = ( Path(__file__).parent.parent / "static_files_for_tests" / "101m_Repair.pdb" @@ -280,6 +288,7 @@ def test_foldx_sasa(): @pytest.mark.slow() def test_rasp_example(): from pathlib import Path + from poli.objective_repository import RaspBlackBox, RaspProblemFactory wildtype_pdb_path = ( diff --git a/src/poli/tests/observers/test_observers.py b/src/poli/tests/observers/test_observers.py index 8a13db51..44eb319c 100644 --- a/src/poli/tests/observers/test_observers.py +++ b/src/poli/tests/observers/test_observers.py @@ -8,17 +8,16 @@ them in isolated processes using `set_observer`. """ -from pathlib import Path import json import shutil +from pathlib import Path import numpy as np +from poli import objective_factory from poli.core.black_box_information import BlackBoxInformation -from poli.core.util.abstract_observer import AbstractObserver from poli.core.registry import register_observer - -from poli import objective_factory +from poli.core.util.abstract_observer import AbstractObserver THIS_DIR = Path(__file__).parent.resolve() diff --git a/src/poli/tests/parallelization/test_parallelization.py b/src/poli/tests/parallelization/test_parallelization.py index 86a22c70..e723a0e8 100644 --- a/src/poli/tests/parallelization/test_parallelization.py +++ b/src/poli/tests/parallelization/test_parallelization.py @@ -1,12 +1,11 @@ +from multiprocessing import cpu_count from pathlib import Path -import pytest import numpy as np +import pytest from poli import objective_factory -from multiprocessing import cpu_count - NUM_WORKERS = min(cpu_count(), 2) diff --git a/src/poli/tests/registry/basic_objectives/test_budget_exhaustion.py b/src/poli/tests/registry/basic_objectives/test_budget_exhaustion.py index 0cd9538e..8ee9d323 100644 --- a/src/poli/tests/registry/basic_objectives/test_budget_exhaustion.py +++ b/src/poli/tests/registry/basic_objectives/test_budget_exhaustion.py @@ -1,8 +1,7 @@ """Tests for the budget exhaustion inside objective functions.""" -import pytest - import numpy as np +import pytest from poli.core.exceptions import BudgetExhaustedException diff --git a/src/poli/tests/registry/chemistry/test_chemistry_objectives.py b/src/poli/tests/registry/chemistry/test_chemistry_objectives.py index 2361cf6a..725931b6 100644 --- a/src/poli/tests/registry/chemistry/test_chemistry_objectives.py +++ b/src/poli/tests/registry/chemistry/test_chemistry_objectives.py @@ -1,33 +1,33 @@ -import pytest from pathlib import Path import numpy as np +import pytest from poli import objective_factory from poli.objective_repository import ( - GSK3BetaBlackBox, - DRD2BlackBox, - JNK3BlackBox, - SABlackBox, - CelecoxibRediscoveryBlackBox, - ThiothixeneRediscoveryBlackBox, - TroglitazoneRediscoveryBlackBox, AlbuterolSimilarityBlackBox, - MestranolSimilarityBlackBox, AmlodipineMPOBlackBox, - FexofenadineMPOBlackBox, - OsimetrinibMPOBlackBox, - PerindoprilMPOBlackBox, - RanolazineMPOBlackBox, - SitagliptinMPOBlackBox, - ZaleplonMPOBlackBox, + CelecoxibRediscoveryBlackBox, DecoHopBlackBox, - ScaffoldHopBlackBox, + DRD2BlackBox, + FexofenadineMPOBlackBox, + GSK3BetaBlackBox, IsomerC7H8N2O2BlackBox, IsomerC9H10N2O2PF2ClBlackBox, + JNK3BlackBox, Median1BlackBox, Median2BlackBox, + MestranolSimilarityBlackBox, + OsimetrinibMPOBlackBox, + PerindoprilMPOBlackBox, + RanolazineMPOBlackBox, + SABlackBox, + ScaffoldHopBlackBox, + SitagliptinMPOBlackBox, + ThiothixeneRediscoveryBlackBox, + TroglitazoneRediscoveryBlackBox, ValsartanSMARTSBlackBox, + ZaleplonMPOBlackBox, ) THIS_DIR = Path(__file__).parent.resolve() diff --git a/src/poli/tests/registry/proteins/test_foldx.py b/src/poli/tests/registry/proteins/test_foldx.py index 51633d08..16d7b4ad 100644 --- a/src/poli/tests/registry/proteins/test_foldx.py +++ b/src/poli/tests/registry/proteins/test_foldx.py @@ -1,7 +1,7 @@ -import pytest from pathlib import Path import numpy as np +import pytest from poli import objective_factory diff --git a/src/poli/tests/registry/proteins/test_foldx_rfp_lambo.py b/src/poli/tests/registry/proteins/test_foldx_rfp_lambo.py index 8e5a8b22..a933c9d3 100644 --- a/src/poli/tests/registry/proteins/test_foldx_rfp_lambo.py +++ b/src/poli/tests/registry/proteins/test_foldx_rfp_lambo.py @@ -3,9 +3,10 @@ @pytest.mark.slow() def test_foldx_rfp_lambo_runs(): - from poli import create import numpy as np + from poli import create + # For now, we don't have automatic installation of lambo. # TODO: add automatic installation of lambo, and remove this # check. diff --git a/src/poli/tests/registry/proteins/test_rasp.py b/src/poli/tests/registry/proteins/test_rasp.py index cd5aada1..629df6b5 100644 --- a/src/poli/tests/registry/proteins/test_rasp.py +++ b/src/poli/tests/registry/proteins/test_rasp.py @@ -1,7 +1,7 @@ -import pytest from pathlib import Path import numpy as np +import pytest from poli import objective_factory diff --git a/src/poli/tests/registry/test_black_box_instancing.py b/src/poli/tests/registry/test_black_box_instancing.py index 9e05cece..422cddd8 100644 --- a/src/poli/tests/registry/test_black_box_instancing.py +++ b/src/poli/tests/registry/test_black_box_instancing.py @@ -1,7 +1,7 @@ from pathlib import Path -import pytest import numpy as np +import pytest from poli.objective_repository import ( AlohaBlackBox, @@ -9,14 +9,14 @@ DRD3BlackBox, FoldXRFPLamboBlackBox, FoldXSASABlackBox, - FoldXStabilityBlackBox, FoldXStabilityAndSASABlackBox, + FoldXStabilityBlackBox, GFPCBasBlackBox, GFPSelectionBlackBox, - PenalizedLogPLamboBlackBox, - RaspBlackBox, LogPBlackBox, + PenalizedLogPLamboBlackBox, QEDBlackBox, + RaspBlackBox, SABlackBox, SuperMarioBrosBlackBox, ToyContinuousBlackBox, diff --git a/src/poli/tests/registry/test_multi_objective_and_negative.py b/src/poli/tests/registry/test_multi_objective_and_negative.py index 6dde8c3c..ecff9d09 100644 --- a/src/poli/tests/registry/test_multi_objective_and_negative.py +++ b/src/poli/tests/registry/test_multi_objective_and_negative.py @@ -2,8 +2,8 @@ def test_multi_objective_instantiation(): - from poli.objective_repository import AlohaBlackBox from poli.core.multi_objective_black_box import MultiObjectiveBlackBox + from poli.objective_repository import AlohaBlackBox f_aloha = AlohaBlackBox() diff --git a/src/poli/tests/registry/test_passing_array_of_strings.py b/src/poli/tests/registry/test_passing_array_of_strings.py index 2e9b3719..bd4e0531 100644 --- a/src/poli/tests/registry/test_passing_array_of_strings.py +++ b/src/poli/tests/registry/test_passing_array_of_strings.py @@ -114,6 +114,7 @@ def test_passing_array_of_strings( to a black box is equivalent to passing an array of [b, L] tokens. """ import numpy as np + from poli import create problem = create( diff --git a/src/poli/tests/registry/toy_continuous_problems/test_instancing_of_toy_continuous_problems.py b/src/poli/tests/registry/toy_continuous_problems/test_instancing_of_toy_continuous_problems.py index 8a30b9f6..7d99e498 100644 --- a/src/poli/tests/registry/toy_continuous_problems/test_instancing_of_toy_continuous_problems.py +++ b/src/poli/tests/registry/toy_continuous_problems/test_instancing_of_toy_continuous_problems.py @@ -1,13 +1,12 @@ """This module tests the instancing of toy continuous problems.""" -import pytest - import numpy as np +import pytest from poli.objective_repository.toy_continuous_problem.register import ( POSSIBLE_FUNCTIONS, - TWO_DIMENSIONAL_PROBLEMS, SIX_DIMENSIONAL_PROBLEMS, + TWO_DIMENSIONAL_PROBLEMS, ) diff --git a/src/poli/tests/registry/toy_discrete_problems/test_ehrlich.py b/src/poli/tests/registry/toy_discrete_problems/test_ehrlich.py index f21415c0..7c5632dc 100644 --- a/src/poli/tests/registry/toy_discrete_problems/test_ehrlich.py +++ b/src/poli/tests/registry/toy_discrete_problems/test_ehrlich.py @@ -4,7 +4,6 @@ """ import numpy as np - import pytest from poli.objective_repository.ehrlich._construct_feasibility_matrix import ( diff --git a/src/poli/tests/static_files_for_tests/test_black_box_instancing.py b/src/poli/tests/static_files_for_tests/test_black_box_instancing.py index 7843f240..63578b52 100644 --- a/src/poli/tests/static_files_for_tests/test_black_box_instancing.py +++ b/src/poli/tests/static_files_for_tests/test_black_box_instancing.py @@ -1,7 +1,7 @@ from pathlib import Path -import pytest import numpy as np +import pytest from poli.objective_repository import ( AlohaBlackBox, @@ -9,14 +9,14 @@ DRD3BlackBox, FoldXRFPLamboBlackBox, FoldXSASABlackBox, - FoldXStabilityBlackBox, FoldXStabilityAndSASABlackBox, + FoldXStabilityBlackBox, GFPCBasBlackBox, GFPSelectionBlackBox, - PenalizedLogPLamboBlackBox, - RaspBlackBox, LogPBlackBox, + PenalizedLogPLamboBlackBox, QEDBlackBox, + RaspBlackBox, WhiteNoiseBlackBox, ) diff --git a/src/poli/tests/test_core_promises.py b/src/poli/tests/test_core_promises.py index c7375f21..db6c36bc 100644 --- a/src/poli/tests/test_core_promises.py +++ b/src/poli/tests/test_core_promises.py @@ -1,7 +1,7 @@ """This test suite contains the core promises we make to the user.""" -import pytest import numpy as np +import pytest def test_creating_an_instance_of_a_black_box(): diff --git a/src/poli/tests/test_minimal_working_example.py b/src/poli/tests/test_minimal_working_example.py index 66a54883..9ce0154a 100644 --- a/src/poli/tests/test_minimal_working_example.py +++ b/src/poli/tests/test_minimal_working_example.py @@ -3,6 +3,7 @@ def test_minimal_working_example_of_problem_creation(): Tests the minimal working example from the readme, verbatum. """ import numpy as np + from poli import objective_factory white_noise_problem = objective_factory.create(name="white_noise") @@ -15,6 +16,7 @@ def test_minimal_working_example_of_problem_creation(): def test_minimal_working_example_of_black_box_instancing(): import numpy as np + from poli.objective_repository import WhiteNoiseBlackBox f = WhiteNoiseBlackBox() diff --git a/src/poli/tests/util/test_foldx_interface.py b/src/poli/tests/util/test_foldx_interface.py index 3bb0b678..48e5f851 100644 --- a/src/poli/tests/util/test_foldx_interface.py +++ b/src/poli/tests/util/test_foldx_interface.py @@ -7,21 +7,17 @@ whole module if the import fails. """ -import pytest - from pathlib import Path +import pytest + try: - from poli.core.util.proteins.foldx import ( - FoldxInterface, - ) + from poli.core.util.proteins.foldx import FoldxInterface except (ImportError, FileNotFoundError): pytest.skip("Could not import the foldx interface. ", allow_module_level=True) try: - from poli.core.util.proteins.pdb_parsing import ( - parse_pdb_as_residue_strings, - ) + from poli.core.util.proteins.pdb_parsing import parse_pdb_as_residue_strings except ImportError: pytest.skip( "Could not import the protein utilities for parsing. ", allow_module_level=True diff --git a/src/poli/tests/util/test_protein_utilities.py b/src/poli/tests/util/test_protein_utilities.py index 06a2df2c..fca0e4c7 100644 --- a/src/poli/tests/util/test_protein_utilities.py +++ b/src/poli/tests/util/test_protein_utilities.py @@ -7,9 +7,10 @@ whole module if the import fails. """ -import pytest from pathlib import Path +import pytest + try: from poli.core.util.proteins.pdb_parsing import ( parse_pdb_as_residue_strings, @@ -23,8 +24,8 @@ try: from poli.core.util.proteins.mutations import ( edits_between_strings, - mutations_from_wildtype_residues_and_mutant, find_closest_wildtype_pdb_file_to_mutant, + mutations_from_wildtype_residues_and_mutant, ) except ImportError: pytest.skip( From 469f3b61f89fce2d5dd47a6213d74d6cb5fd999e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Gonz=C3=A1lez=20Duque?= Date: Thu, 11 Jul 2024 16:59:01 +0200 Subject: [PATCH 06/10] Clean tests and divide them by environment for CI (#217) * Shortens benchmark tests, removes example tests, add attach test for observers * Cleans several tests and adds unmarked flags for quick testing * Splits tests according to their environments in CI * Updates contributing * Updates conftest to include parametrized unmarked tests * Updates linting to include isort * Updates isort checking to only handle src --- ...-conda.yml => python-tox-testing-base.yml} | 6 +- .../python-tox-testing-dockstring-env.yml | 26 ++ .../python-tox-testing-lambo-env.yml | 26 ++ .../python-tox-testing-protein-env.yml | 26 ++ .../workflows/python-tox-testing-tdc-env.yml | 26 ++ CONTRIBUTING.md | 49 ++- pyproject.toml | 6 + src/poli/core/black_box_information.py | 8 +- .../benchmarks/test_benchmark_creation.py | 16 +- src/poli/tests/conftest.py | 11 + src/poli/tests/docs_examples/__init__.py | 0 .../tests/docs_examples/alphabet_selfies.json | 4 - .../docs_examples/test_objective_functions.py | 330 ------------------ src/poli/tests/observers/test_observers.py | 20 +- .../parallelization/test_parallelization.py | 5 +- .../basic_objectives/test_basic_objectives.py | 4 - .../test_budget_exhaustion.py | 10 - .../chemistry/test_chemistry_objectives.py | 26 +- .../tests/registry/proteins/test_foldx.py | 131 +------ .../registry/proteins/test_foldx_rfp_lambo.py | 5 +- src/poli/tests/registry/proteins/test_rasp.py | 2 + .../registry/test_black_box_instancing.py | 219 ------------ .../tests/registry/test_force_isolation.py | 4 - .../test_instancing_black_boxes_alone.py | 11 - .../registry/test_passing_array_of_strings.py | 19 - ...est_embedding_problems_into_higher_dims.py | 4 - src/poli/tests/test_core_promises.py | 16 - .../tests/test_minimal_working_example.py | 24 -- src/poli/tests/test_seeding.py | 4 - src/poli/tests/util/test_foldx_interface.py | 1 + src/poli/tests/util/test_protein_utilities.py | 1 + tox.ini | 99 ++++-- 32 files changed, 291 insertions(+), 848 deletions(-) rename .github/workflows/{python-tox-testing-including-conda.yml => python-tox-testing-base.yml} (75%) create mode 100644 .github/workflows/python-tox-testing-dockstring-env.yml create mode 100644 .github/workflows/python-tox-testing-lambo-env.yml create mode 100644 .github/workflows/python-tox-testing-protein-env.yml create mode 100644 .github/workflows/python-tox-testing-tdc-env.yml create mode 100644 src/poli/tests/conftest.py delete mode 100644 src/poli/tests/docs_examples/__init__.py delete mode 100644 src/poli/tests/docs_examples/alphabet_selfies.json delete mode 100644 src/poli/tests/docs_examples/test_objective_functions.py delete mode 100644 src/poli/tests/registry/test_black_box_instancing.py delete mode 100644 src/poli/tests/registry/test_instancing_black_boxes_alone.py delete mode 100644 src/poli/tests/test_minimal_working_example.py diff --git a/.github/workflows/python-tox-testing-including-conda.yml b/.github/workflows/python-tox-testing-base.yml similarity index 75% rename from .github/workflows/python-tox-testing-including-conda.yml rename to .github/workflows/python-tox-testing-base.yml index 85e35766..5c4ec870 100644 --- a/.github/workflows/python-tox-testing-including-conda.yml +++ b/.github/workflows/python-tox-testing-base.yml @@ -1,4 +1,4 @@ -name: Tests on poli (dev, conda, python 3.9) +name: poli base (dev, conda, python 3.9) on: [push] @@ -24,6 +24,6 @@ jobs: - name: Test linting with tox run: | tox -c tox.ini -e lint - - name: Test poli-base with tox (ignoring RaSP) + - name: Test basic behaviors of poli with tox and pytest run: | - tox -c tox.ini -e poli-base-py39 -- --ignore=src/poli/tests/registry/proteins/test_rasp.py + tox -c tox.ini -e poli-base-py39 diff --git a/.github/workflows/python-tox-testing-dockstring-env.yml b/.github/workflows/python-tox-testing-dockstring-env.yml new file mode 100644 index 00000000..3f40c42b --- /dev/null +++ b/.github/workflows/python-tox-testing-dockstring-env.yml @@ -0,0 +1,26 @@ +name: poli dockstring (dev, conda, python 3.9) + +on: [push] + +jobs: + build-linux: + runs-on: ubuntu-latest + strategy: + max-parallel: 5 + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.9 + uses: actions/setup-python@v3 + with: + python-version: '3.9' + - name: Add conda to system path + run: | + # $CONDA is an environment variable pointing to the root of the miniconda directory + echo $CONDA/bin >> $GITHUB_PATH + - name: Install dependencies + run: | + python -m pip install tox + - name: Test dockstring-related black boxes with tox and pytest + run: | + tox -c tox.ini -e poli-dockstring-py39 diff --git a/.github/workflows/python-tox-testing-lambo-env.yml b/.github/workflows/python-tox-testing-lambo-env.yml new file mode 100644 index 00000000..ed15510f --- /dev/null +++ b/.github/workflows/python-tox-testing-lambo-env.yml @@ -0,0 +1,26 @@ +name: poli lambo (dev, conda, python 3.9) + +on: [push] + +jobs: + build-linux: + runs-on: ubuntu-latest + strategy: + max-parallel: 5 + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.9 + uses: actions/setup-python@v3 + with: + python-version: '3.9' + - name: Add conda to system path + run: | + # $CONDA is an environment variable pointing to the root of the miniconda directory + echo $CONDA/bin >> $GITHUB_PATH + - name: Install dependencies + run: | + python -m pip install tox + - name: Test lambo-related black boxes with tox and pytest + run: | + tox -c tox.ini -e poli-lambo-py39 diff --git a/.github/workflows/python-tox-testing-protein-env.yml b/.github/workflows/python-tox-testing-protein-env.yml new file mode 100644 index 00000000..453a3f88 --- /dev/null +++ b/.github/workflows/python-tox-testing-protein-env.yml @@ -0,0 +1,26 @@ +name: poli protein (dev, conda, python 3.9) + +on: [push] + +jobs: + build-linux: + runs-on: ubuntu-latest + strategy: + max-parallel: 5 + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.9 + uses: actions/setup-python@v3 + with: + python-version: '3.9' + - name: Add conda to system path + run: | + # $CONDA is an environment variable pointing to the root of the miniconda directory + echo $CONDA/bin >> $GITHUB_PATH + - name: Install dependencies + run: | + python -m pip install tox + - name: Test protein-related black boxes with tox and pytest + run: | + tox -c tox.ini -e poli-protein-py39 diff --git a/.github/workflows/python-tox-testing-tdc-env.yml b/.github/workflows/python-tox-testing-tdc-env.yml new file mode 100644 index 00000000..b6d1c803 --- /dev/null +++ b/.github/workflows/python-tox-testing-tdc-env.yml @@ -0,0 +1,26 @@ +name: poli tdc (dev, conda, python 3.9) + +on: [push] + +jobs: + build-linux: + runs-on: ubuntu-latest + strategy: + max-parallel: 5 + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.9 + uses: actions/setup-python@v3 + with: + python-version: '3.9' + - name: Add conda to system path + run: | + # $CONDA is an environment variable pointing to the root of the miniconda directory + echo $CONDA/bin >> $GITHUB_PATH + - name: Install dependencies + run: | + python -m pip install tox + - name: Test PyTDC-related black boxes with tox and pytest + run: | + tox -c tox.ini -e poli-tdc-py39 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6327afe6..53e44344 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -3,7 +3,7 @@ ![Linting: black](https://img.shields.io/badge/Linting-black-black) ![Testing: pytest](https://img.shields.io/badge/Testing-pytest-blue) ![Testing: tox](https://img.shields.io/badge/Testing-tox-blue) -![Main branch: black](https://img.shields.io/badge/Pull_request_to-dev-blue) +![Main branch: dev](https://img.shields.io/badge/Pull_request_to-dev-blue) This note details how to contribute to `poli`. @@ -11,20 +11,41 @@ This note details how to contribute to `poli`. The main development branch is called `dev`. To contribute, we recommend creating a fork of this repository and making changes on your version. Once you are ready to contribute, we expect you to document, lint and test. -## Documentation standards +## Installing dev dependencies and pre-commit hooks -We follow [numpy's documentation standards](https://numpydoc.readthedocs.io/en/latest/format.html). +We recommend you create a `poli-dev` environment in conda -## Linting your changes +```bash +conda create -n poli-dev python=3.10 +conda activate poli-dev +pip install -r requirements.txt +pip install -r requirements-dev.txt +``` -We expect you to lint the code you write or modify using `black`. +The dev requirements include `pre-commit`. Install the hooks in our config by running ```bash -pip install black -black ./path/to/files +pre-commit install ``` -## Testing your changes for `dev`` +Now every commit will run linting and isorting for you. You can also run it manually by saying + +```bash +pre-commit run --all-files +``` + +## Documentation standards + +We follow [numpy's documentation standards](https://numpydoc.readthedocs.io/en/latest/format.html). + +## Creating a new environment? Mark your tests + +If you're contributing a black box in a new environment, remember to + +1. Mark all your tests with `@pyest.mark.poli__your_env`. +2. Add a description of your marker to the `[tool.pytest.ini_options]`. + +## Testing your changes for `dev` Since we are testing multiple conda environments, we settled for using a combination of `tox` and `pytest`. @@ -32,19 +53,15 @@ Since we are testing multiple conda environments, we settled for using a combina pip install tox # To test linting (from the root of the project) -tox -c tox.dev.ini -e lint +tox -c tox.ini -e lint # To test in the base environment for poli -tox -c tox.dev.ini -e poli-base-py39 +tox -c tox.ini -e poli-base-py39 ``` -If you want to run tests in all environments, remove `-e poli-base-py39` and just run `tox`. - -## More thorough testing - -In many cases, testing with the instructions above should be enough. However, since we are dealing with creating conda environments, the definite test comes by building the Docker image specified in `Dockerfile.test`, and running it. +There are several different environments (depending on the black boxes we test). Check `tox.ini` for more details. -When contributing to the `@master` branch (i.e. to release), we will run these tests. +If you want to run tests in all environments, remove `-e poli-base-py39` and just run `tox`. This might take a while, and several conda envs will be created. ## Create a pull request to dev diff --git a/pyproject.toml b/pyproject.toml index 46b1fd4e..ded40ea2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,12 @@ Homepage = "https://github.com/MachineLearningLifeScience/poli" [tool.pytest.ini_options] markers = [ "slow: marks tests as slow (deselect with '-m \"not slow\"')", + "poli__lambo: marks tests that run in the poli__lambo environment", + "poli__dockstring: marks tests that run in the poli__dockstring environment", + "poli__tdc: marks tests that run in the poli__tdc environment", + "poli__protein: marks tests that run in the poli__protein environment", + "poli__rasp: marks tests that run in the poli__rasp environment", + "unmarked: All other tests, which usually run in the base environment", ] [tool.isort] diff --git a/src/poli/core/black_box_information.py b/src/poli/core/black_box_information.py index 325a56e8..93de034e 100644 --- a/src/poli/core/black_box_information.py +++ b/src/poli/core/black_box_information.py @@ -14,6 +14,8 @@ from typing import Literal, Union +import numpy as np + class BlackBoxInformation: def __init__( @@ -145,7 +147,11 @@ def __repr__(self): def as_dict(self): return { "name": self.name, - "max_sequence_length": self.max_sequence_length, + "max_sequence_length": ( + self.max_sequence_length + if self.max_sequence_length != np.inf + else "inf" + ), "aligned": self.aligned, "fixed_length": self.fixed_length, "deterministic": self.deterministic, diff --git a/src/poli/tests/benchmarks/test_benchmark_creation.py b/src/poli/tests/benchmarks/test_benchmark_creation.py index 00f45de9..5f10d78b 100644 --- a/src/poli/tests/benchmarks/test_benchmark_creation.py +++ b/src/poli/tests/benchmarks/test_benchmark_creation.py @@ -7,6 +7,8 @@ tests are already covered by the tests in objective_repository. """ +import pytest + def test_creating_toy_continuous_functions_benchmark(): from poli.benchmarks import ToyContinuousFunctionsBenchmark @@ -27,6 +29,7 @@ def test_creating_embedded_toy_continuous_functions_benchmark(): f(x0) +@pytest.mark.poli__tdc def test_creating_guacamol_benchmark(): from poli.benchmarks import GuacaMolGoalDirectedBenchmark @@ -35,7 +38,12 @@ def test_creating_guacamol_benchmark(): for problem in benchmark: f, x0 = problem.black_box, problem.x0 + # Break after the first iteration + # for CI efficiency + break + +@pytest.mark.poli__tdc def test_creating_pmo_benchmark(): from poli.benchmarks import PMOBenchmark @@ -44,6 +52,8 @@ def test_creating_pmo_benchmark(): for problem in benchmark: f, x0 = problem.black_box, problem.x0 - -if __name__ == "__main__": - test_creating_embedded_toy_continuous_functions_benchmark() + # Break after the first iteration + # for CI efficiency. The creation of all + # these black boxes is already being tested + # in the chemistry test suite. + break diff --git a/src/poli/tests/conftest.py b/src/poli/tests/conftest.py new file mode 100644 index 00000000..e89b1db1 --- /dev/null +++ b/src/poli/tests/conftest.py @@ -0,0 +1,11 @@ +def pytest_collection_modifyitems(items, config): + for item in items: + markers = item.iter_markers() + is_unmarked = True + for marker in markers: + if marker.name != "parametrize": + is_unmarked = False + break + + if is_unmarked: + item.add_marker("unmarked") diff --git a/src/poli/tests/docs_examples/__init__.py b/src/poli/tests/docs_examples/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/poli/tests/docs_examples/alphabet_selfies.json b/src/poli/tests/docs_examples/alphabet_selfies.json deleted file mode 100644 index e744a5d6..00000000 --- a/src/poli/tests/docs_examples/alphabet_selfies.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "": 0, - "[C]": 1 -} \ No newline at end of file diff --git a/src/poli/tests/docs_examples/test_objective_functions.py b/src/poli/tests/docs_examples/test_objective_functions.py deleted file mode 100644 index e9832522..00000000 --- a/src/poli/tests/docs_examples/test_objective_functions.py +++ /dev/null @@ -1,330 +0,0 @@ -""" -This test module contains all the different instructions -on how to run the different objective functions that are -available poli's objective repository. -""" - -import pytest - - -def test_white_noise_example(): - import numpy as np - - from poli.objective_repository import WhiteNoiseBlackBox, WhiteNoiseProblemFactory - - # Creating the black box - f = WhiteNoiseBlackBox() - - # Creating a problem - problem = WhiteNoiseProblemFactory().create() - f, x0 = problem.black_box, problem.x0 - - # Example input: - x = np.array([["1", "2", "3"]]) # must be of shape [b, L], in this case [1, 3]. - - # Querying: - print(f(x)) - - -def test_aloha_example(): - import numpy as np - - from poli.objective_repository import AlohaBlackBox, AlohaProblemFactory - - # Creating the black box - f = AlohaBlackBox() - - # Creating a problem - problem = AlohaProblemFactory().create() - f, x0 = problem.black_box, problem.x0 - - # Example input: - x = np.array( - [["A", "L", "O", "O", "F"]] - ) # must be of shape [b, L], in this case [1, 5]. - - # Querying: - print(f(x)) # Should be 3 (A, L, and the first O). - - # Querying: - y = f(x) - print(y) # Should be 3 (A, L, and the first O). - assert np.isclose(y, 3).all() - - -def test_toy_continuous_example(): - import numpy as np - - from poli.objective_repository import ( - ToyContinuousBlackBox, - ToyContinuousProblemFactory, - ) - - function_name = "ackley_function_01" - n_dimensions = 2 - - # Creating the black box - f = ToyContinuousBlackBox( - function_name=function_name, - n_dimensions=n_dimensions, - ) - - # Creating a problem - problem = ToyContinuousProblemFactory().create( - function_name=function_name, - n_dimensions=n_dimensions, - ) - f, x0 = problem.black_box, problem.x0 - - # Example input: - x = np.array([[0.5, 0.5]]) # must be of shape [b, L], in this case [1, 2]. - - # Querying: - print(f(x)) - - problem = ToyContinuousProblemFactory().create( - function_name="camelback_2d", - embed_in=30, # This will create a function that takes 30d input values - ) - - -def test_qed_example(): - import numpy as np - - from poli.objective_repository import QEDBlackBox, QEDProblemFactory - - # Creating the black box - f = QEDBlackBox(string_representation="SELFIES") - - # Creating a problem - problem = QEDProblemFactory().create(string_representation="SELFIES") - f, x0 = problem.black_box, problem.x0 - - # Example input: a single carbon - x = np.array([["[C]"]]) - - # Querying: - y = f(x) - print(y) # Should be close to 0.35978 - assert np.isclose(y, 0.35978494).all() - - -def test_logp_example(): - import numpy as np - - from poli.objective_repository import LogPBlackBox, LogPProblemFactory - - # Creating the black box - f = LogPBlackBox(string_representation="SMILES") - - # Creating a problem - problem = LogPProblemFactory().create(string_representation="SMILES") - f, x0 = problem.black_box, problem.x0 - - # Example input: a single carbon - x = np.array(["C"]).reshape(1, -1) - - # Querying: - y = f(x) - print(y) # Should be close to 0.6361 - assert np.isclose(y, 0.6361).all() - - -def test_dockstring_example(): - import numpy as np - - from poli.objective_repository import DockstringBlackBox, DockstringProblemFactory - - # Creating the black box - f = DockstringBlackBox(target_name="DRD2") - - # Creating a problem - problem = DockstringProblemFactory().create(target_name="DRD2") - f, x0 = problem.black_box, problem.x0 - - # Example input: risperidone - x = np.array(["CC1=C(C(=O)N2CCCCC2=N1)CCN3CCC(CC3)C4=NOC5=C4C=CC(=C5)F"]) - - # Querying: - y = f(x) - print(y) # Should be 11.9 - - # As of 25/06/2024, the value changed from 11.9 to 11.8. - # Several potential culprits here: RDKit being modified - # to accomodate for numpy 2.0, or maybe OpenBabel... - - # An issue will be raised on DockString's repository. - assert np.isclose(y, 11.9, atol=1e-1).all() - - -def test_drd3_docking_example(): - # TODO: for this one, we need autodock vina and other stronger dependencies - # that can't be handled by conda. We should skip this test for now. - pytest.skip() - import numpy as np - - from poli.objective_repository import DRD3BlackBox, DRD3ProblemFactory - - # Creating the black box - f = DRD3BlackBox(string_representation="SMILES", force_isolation=True) - - # Creating a problem - problem = DRD3ProblemFactory().create( - string_representation="SMILES", force_isolation=True - ) - f, x0 = problem.black_box, problem.x0 - - # Example input: - x = np.array(["c1ccccc1"]) - - # Querying: - y = f(x) - print(y) # Should be close to -4.1 - assert np.isclose(y, -4.1).all() - - -def test_penalized_logp_lambo(): - import numpy as np - - _ = pytest.importorskip("lambo") - - from poli.objective_repository import ( - PenalizedLogPLamboBlackBox, - PenalizedLogPLamboProblemFactory, - ) - - # Creating the black box - f = PenalizedLogPLamboBlackBox() - - # Creating a problem - problem = PenalizedLogPLamboProblemFactory().create() - f, x0 = problem.black_box, problem.x0 - - # Example input: a single carbon - x = np.array(["C"]).reshape(1, -1) - - # Querying: - y = f(x) - print(y) # Should be close to 0.6361 - assert np.isclose(y, -6.22381305).all() - - -def test_sa_tdc_example(): - import numpy as np - - from poli.objective_repository import SABlackBox, SAProblemFactory - - # Creating the black box - f = SABlackBox() - - # Creating a problem - problem = SAProblemFactory().create() - f, x0 = problem.black_box, problem.x0 - - # Example input: (taken from the TDC) - x = np.array(["CCNC(=O)c1ccc(NC(=O)N2CC[C@H](C)[C@H](O)C2)c(C)c1"]) - - # Querying: - y = f(x) - print(y) # Should be close to 2.85483733 - assert np.isclose(y, 2.85483733).all() - - -def test_foldx_stability(): - from pathlib import Path - - if not (Path.home() / "foldx" / "foldx").exists(): - pytest.skip("FoldX not installed") - - from pathlib import Path - - from poli.objective_repository import ( - FoldXStabilityBlackBox, - FoldXStabilityProblemFactory, - ) - - wildtype_pdb_path = ( - Path(__file__).parent.parent / "static_files_for_tests" / "101m_Repair.pdb" - ) - - # Creating the black box - f = FoldXStabilityBlackBox(wildtype_pdb_path=[wildtype_pdb_path]) - - # Creating a problem - problem = FoldXStabilityProblemFactory().create( - wildtype_pdb_path=[wildtype_pdb_path] - ) - f, x0 = problem.black_box, problem.x0 - - # Example evaluation: evaluating without mutations - print(f(x0)) - - -def test_foldx_sasa(): - from pathlib import Path - - if not (Path.home() / "foldx" / "foldx").exists(): - pytest.skip("FoldX not installed") - - from pathlib import Path - - from poli.objective_repository import FoldXSASABlackBox, FoldXSASAProblemFactory - - wildtype_pdb_path = ( - Path(__file__).parent.parent / "static_files_for_tests" / "101m_Repair.pdb" - ) - - # Creating the black box - f = FoldXSASABlackBox(wildtype_pdb_path=[wildtype_pdb_path]) - - # Creating a problem - problem = FoldXSASAProblemFactory().create(wildtype_pdb_path=[wildtype_pdb_path]) - f, x0 = problem.black_box, problem.x0 - - # Example evaluation: evaluating without mutations - print(f(x0)) - - -@pytest.mark.slow() -def test_rasp_example(): - from pathlib import Path - - from poli.objective_repository import RaspBlackBox, RaspProblemFactory - - wildtype_pdb_path = ( - Path(__file__).parent.parent / "static_files_for_tests" / "3ned.pdb" - ) - - # Creating the black box - f = RaspBlackBox(wildtype_pdb_path=[wildtype_pdb_path]) - - # Creating a problem - problem = RaspProblemFactory().create(wildtype_pdb_path=[wildtype_pdb_path]) - f, x0 = problem.black_box, problem.x0 - - # Querying: - print(f(x0)) - - -def test_smb_example(): - pytest.skip("We need to check for a virtual frame buffer.") - # TODO: the user has to have a screen (or virtual frame - # buffer) to run this. How can we account for this? - from poli.objective_repository import ( - SuperMarioBrosBlackBox, - SuperMarioBrosProblemFactory, - ) - - # Creating the black box - f = SuperMarioBrosBlackBox() - - # Creating a problem - problem = SuperMarioBrosProblemFactory().create(visualize=True) - f, x0 = problem.black_box, problem.x0 - - # Querying: - print(f(x0)) - - -if __name__ == "__main__": - test_smb_example() diff --git a/src/poli/tests/observers/test_observers.py b/src/poli/tests/observers/test_observers.py index 44eb319c..12c5995f 100644 --- a/src/poli/tests/observers/test_observers.py +++ b/src/poli/tests/observers/test_observers.py @@ -9,7 +9,6 @@ """ import json -import shutil from pathlib import Path import numpy as np @@ -184,5 +183,20 @@ def test_multiple_observer_registration(): problem_2.observer._observer.finish() -if __name__ == "__main__": - test_observer_registration_and_external_instancing() +def test_attaching_an_observer_to_a_black_box(): + from poli.repository import ToyContinuousBlackBox + + f = ToyContinuousBlackBox( + function_name="ackley_function_01", + n_dimensions=10, + ) + + observer = SimpleObserver() + + f.set_observer(observer) + + observer.initialize_observer(f.info, {"experiment_id": "attaching"}, seed=0) + + f(np.array([0.0] * 10).reshape(1, 10)) + + assert len(observer.results[0]["x"]) == 1 diff --git a/src/poli/tests/parallelization/test_parallelization.py b/src/poli/tests/parallelization/test_parallelization.py index e723a0e8..14280c79 100644 --- a/src/poli/tests/parallelization/test_parallelization.py +++ b/src/poli/tests/parallelization/test_parallelization.py @@ -51,6 +51,7 @@ def test_parallelization_in_qed(): assert np.isclose(y1, np.array([[0.35978494], [0.37278556], [0.38547066]])).all() +@pytest.mark.poli__protein def test_parallelization_in_foldx_stability_and_sasa(): HOME_DIR = Path().home().resolve() PATH_TO_FOLDX_FILES = HOME_DIR / "foldx" @@ -72,7 +73,3 @@ def test_parallelization_in_foldx_stability_and_sasa(): f, x0 = problem.black_box, problem.x0 f(x0) - - -if __name__ == "__main__": - test_parallelization_in_foldx_stability_and_sasa() diff --git a/src/poli/tests/registry/basic_objectives/test_basic_objectives.py b/src/poli/tests/registry/basic_objectives/test_basic_objectives.py index 2dd20d2c..423fb5bd 100644 --- a/src/poli/tests/registry/basic_objectives/test_basic_objectives.py +++ b/src/poli/tests/registry/basic_objectives/test_basic_objectives.py @@ -1,11 +1,7 @@ -from pathlib import Path - import numpy as np from poli import objective_factory -THIS_DIR = Path(__file__).parent.resolve() - def test_registering_white_noise(): white_noise_problem = objective_factory.create(name="white_noise") diff --git a/src/poli/tests/registry/basic_objectives/test_budget_exhaustion.py b/src/poli/tests/registry/basic_objectives/test_budget_exhaustion.py index 8ee9d323..c4f4c612 100644 --- a/src/poli/tests/registry/basic_objectives/test_budget_exhaustion.py +++ b/src/poli/tests/registry/basic_objectives/test_budget_exhaustion.py @@ -6,12 +6,6 @@ from poli.core.exceptions import BudgetExhaustedException -def test_budget_exhaustion_exception(): - """Test that the exception is raised when the budget is exhausted.""" - with pytest.raises(BudgetExhaustedException): - raise BudgetExhaustedException() - - def test_num_evaluation_tracks_correctly(): from poli import objective_factory @@ -51,7 +45,3 @@ def test_budget_exhausts(): with pytest.raises(BudgetExhaustedException): f(x0) - - -if __name__ == "__main__": - test_budget_exhausts() diff --git a/src/poli/tests/registry/chemistry/test_chemistry_objectives.py b/src/poli/tests/registry/chemistry/test_chemistry_objectives.py index 725931b6..6709ab1b 100644 --- a/src/poli/tests/registry/chemistry/test_chemistry_objectives.py +++ b/src/poli/tests/registry/chemistry/test_chemistry_objectives.py @@ -67,24 +67,7 @@ def test_force_registering_qed_with_context_manager(): assert np.isclose(y, 0.35978494).all() -def test_force_registering_logp(): - """ - We test whether we can force-register the logp problem - if rdkit and selfies are not installed. - """ - problem = objective_factory.create( - name="rdkit_logp", - force_register=True, - ) - f, x0 = problem.black_box, problem.x0 - y0 = f(x0) - - # Asserting that a single carbon atom has logp close - # to 0.6361. (according to RDKit) - assert np.isclose(y0, 0.6361).all() - f.terminate() - - +@pytest.mark.poli__lambo def test_penalized_logp_lambo(): """ Testing whether we can register the logp problem @@ -98,6 +81,7 @@ def test_penalized_logp_lambo(): problem = objective_factory.create(name="penalized_logp_lambo", force_register=True) +@pytest.mark.poli__dockstring def test_querying_dockstring_using_smiles(): """ In this test, we force-register and query dockstring. @@ -119,6 +103,7 @@ def test_querying_dockstring_using_smiles(): f.terminate() +@pytest.mark.poli__dockstring def test_querying_dockstring_using_selfies(): """ In this test, we check whether dockstring still @@ -303,6 +288,7 @@ def test_querying_dockstring_using_selfies(): ] +@pytest.mark.poli__tdc @pytest.mark.parametrize( "black_box_name, black_box_class, kwargs_for_black_box, value_to_check", test_data_for_pmo, @@ -328,7 +314,3 @@ def test_pmo_black_boxes( assert np.allclose(y0_, y0) if value_to_check is not None: assert (y0_ == value_to_check).all() - - -if __name__ == "__main__": - test_pmo_black_boxes(*test_data_for_pmo[-1]) diff --git a/src/poli/tests/registry/proteins/test_foldx.py b/src/poli/tests/registry/proteins/test_foldx.py index 16d7b4ad..8672d202 100644 --- a/src/poli/tests/registry/proteins/test_foldx.py +++ b/src/poli/tests/registry/proteins/test_foldx.py @@ -16,99 +16,12 @@ pytest.skip("FoldX is not compiled. ", allow_module_level=True) -def test_foldx_stability_is_available(): - """ - We test whether foldx_stability is available when - 1. foldx is installed. - 2. foldx files are in the right position - 2. biopython and python-levenshtein are installed - """ - HOME_DIR = Path().home().resolve() - PATH_TO_FOLDX_FILES = HOME_DIR / "foldx" - if not PATH_TO_FOLDX_FILES.exists(): - pytest.skip("FoldX is not installed. ") - - if not (PATH_TO_FOLDX_FILES / "foldx").exists(): - pytest.skip("FoldX is not compiled. ") - - _ = pytest.importorskip("Bio") - _ = pytest.importorskip("Levenshtein") - _ = pytest.importorskip("pdbtools") - - from poli.objective_repository import AVAILABLE_PROBLEM_FACTORIES - - assert "foldx_stability" in AVAILABLE_PROBLEM_FACTORIES - - -def test_force_registering_foldx_stability(): - """ - We test whether we can force-register the foldx_stability - problem if foldx is installed. - """ - HOME_DIR = Path().home().resolve() - PATH_TO_FOLDX_FILES = HOME_DIR / "foldx" - if not PATH_TO_FOLDX_FILES.exists(): - pytest.skip("FoldX is not installed. ") - - if not (PATH_TO_FOLDX_FILES / "foldx").exists(): - pytest.skip("FoldX is not compiled. ") - - problem = objective_factory.create( - name="foldx_stability", - wildtype_pdb_path=THIS_DIR / "101m_Repair.pdb", - force_register=True, - ) - f, x0 = problem.black_box, problem.x0 - y0 = f(x0) - - assert np.isclose(y0, 32.4896).all() - f.terminate() - - -def test_force_registering_foldx_sasa(): - """ - We test whether we can force-register the foldx_sasa - problem if foldx is installed. - """ - HOME_DIR = Path().home().resolve() - PATH_TO_FOLDX_FILES = HOME_DIR / "foldx" - if not PATH_TO_FOLDX_FILES.exists(): - pytest.skip("FoldX is not installed. ") - - if not (PATH_TO_FOLDX_FILES / "foldx").exists(): - pytest.skip("FoldX is not compiled. ") - - problem = objective_factory.create( - name="foldx_sasa", - wildtype_pdb_path=THIS_DIR / "101m_Repair.pdb", - force_register=True, - ) - f, x0 = problem.black_box, problem.x0 - y0 = f(x0) - - assert np.isclose(y0, 8411.45578009).all() - f.terminate() - - -def test_registering_foldx_stability(): +@pytest.mark.poli__protein +def test_running_foldx_stability(): """ Testing whether we can register the logp problem if biopython and python-levenshtein are installed. """ - HOME_DIR = Path().home().resolve() - PATH_TO_FOLDX_FILES = HOME_DIR / "foldx" - if not PATH_TO_FOLDX_FILES.exists(): - pytest.skip("FoldX is not installed. ") - - if not (PATH_TO_FOLDX_FILES / "foldx").exists(): - pytest.skip("FoldX is not compiled. ") - - if not (THIS_DIR / "101m_Repair.pdb").exists(): - pytest.skip("Could not find wildtype 101m_Repair.pdb in test folder.") - - _ = pytest.importorskip("Bio") - _ = pytest.importorskip("Levenshtein") - problem = objective_factory.create( name="foldx_stability", wildtype_pdb_path=THIS_DIR / "101m_Repair.pdb", @@ -119,22 +32,12 @@ def test_registering_foldx_stability(): assert np.isclose(y0, 32.4896).all() -def test_registering_foldx_sasa(): +@pytest.mark.poli__protein +def test_running_foldx_sasa(): """ Testing whether we can register the logp problem if biopython and python-levenshtein are installed. """ - HOME_DIR = Path().home().resolve() - PATH_TO_FOLDX_FILES = HOME_DIR / "foldx" - if not PATH_TO_FOLDX_FILES.exists(): - pytest.skip("FoldX is not installed. ") - - if not (PATH_TO_FOLDX_FILES / "foldx").exists(): - pytest.skip("FoldX is not compiled. ") - - _ = pytest.importorskip("Bio") - _ = pytest.importorskip("Levenshtein") - problem = objective_factory.create( name="foldx_sasa", wildtype_pdb_path=THIS_DIR / "101m_Repair.pdb", @@ -145,21 +48,12 @@ def test_registering_foldx_sasa(): assert np.isclose(y0, 8411.45578009).all() -def test_registering_foldx_stability_and_sasa(): +@pytest.mark.poli__protein +def test_running_foldx_stability_and_sasa(): """ Testing whether we can register the logp problem if biopython and python-levenshtein are installed. """ - _ = pytest.importorskip("Bio") - _ = pytest.importorskip("Levenshtein") - HOME_DIR = Path().home().resolve() - PATH_TO_FOLDX_FILES = HOME_DIR / "foldx" - if not PATH_TO_FOLDX_FILES.exists(): - pytest.skip("FoldX is not installed. ") - - if not (PATH_TO_FOLDX_FILES / "foldx").exists(): - pytest.skip("FoldX is not compiled. ") - problem = objective_factory.create( name="foldx_stability_and_sasa", wildtype_pdb_path=THIS_DIR / "101m_Repair.pdb", @@ -171,20 +65,11 @@ def test_registering_foldx_stability_and_sasa(): assert np.isclose(y0[:, 1], 8411.45578009).all() +@pytest.mark.poli__protein def test_registering_foldx_stability_and_sasa_with_verbose_output(): """ Testing whether the foldx output is printed. """ - _ = pytest.importorskip("Bio") - _ = pytest.importorskip("Levenshtein") - HOME_DIR = Path().home().resolve() - PATH_TO_FOLDX_FILES = HOME_DIR / "foldx" - if not PATH_TO_FOLDX_FILES.exists(): - pytest.skip("FoldX is not installed. ") - - if not (PATH_TO_FOLDX_FILES / "foldx").exists(): - pytest.skip("FoldX is not compiled. ") - problem = objective_factory.create( name="foldx_stability_and_sasa", wildtype_pdb_path=THIS_DIR / "101m_Repair.pdb", @@ -197,6 +82,7 @@ def test_registering_foldx_stability_and_sasa_with_verbose_output(): assert np.isclose(y0[:, 1], 8411.45578009).all() +@pytest.mark.poli__protein @pytest.mark.slow() def test_foldx_from_non_repaired_file(): """ @@ -219,6 +105,7 @@ def test_foldx_from_non_repaired_file(): assert np.isclose(y0, 32.6135).all() +@pytest.mark.poli__protein def test_foldx_from_repaired_file(): """ In this test, we check whether no repair is diff --git a/src/poli/tests/registry/proteins/test_foldx_rfp_lambo.py b/src/poli/tests/registry/proteins/test_foldx_rfp_lambo.py index a933c9d3..ba23706b 100644 --- a/src/poli/tests/registry/proteins/test_foldx_rfp_lambo.py +++ b/src/poli/tests/registry/proteins/test_foldx_rfp_lambo.py @@ -1,6 +1,7 @@ import pytest +@pytest.mark.poli__lambo @pytest.mark.slow() def test_foldx_rfp_lambo_runs(): import numpy as np @@ -50,7 +51,3 @@ def test_foldx_rfp_lambo_runs(): f(first_base_candidate_and_mutation), np.array([[-10591.87684184, -61.8757], [-10634.23150497, -61.5511]]), ).all() - - -if __name__ == "__main__": - test_foldx_rfp_lambo_runs() diff --git a/src/poli/tests/registry/proteins/test_rasp.py b/src/poli/tests/registry/proteins/test_rasp.py index 629df6b5..b9235e00 100644 --- a/src/poli/tests/registry/proteins/test_rasp.py +++ b/src/poli/tests/registry/proteins/test_rasp.py @@ -8,6 +8,7 @@ THIS_DIR = Path(__file__).parent.resolve() +@pytest.mark.poli__rasp def test_rasp_on_3ned_against_notebooks_results_on_rasp_env(): try: from poli.objective_repository.rasp.isolated_function import RaspIsolatedLogic @@ -49,6 +50,7 @@ def test_rasp_on_3ned_against_notebooks_results_on_rasp_env(): assert np.isclose(y[2], -0.2835593180137258, atol=1e-4) +@pytest.mark.poli__rasp def test_rasp_on_3ned_against_notebooks_results_isolated(): """ We test forceful registration of the RaSP problem. diff --git a/src/poli/tests/registry/test_black_box_instancing.py b/src/poli/tests/registry/test_black_box_instancing.py deleted file mode 100644 index 422cddd8..00000000 --- a/src/poli/tests/registry/test_black_box_instancing.py +++ /dev/null @@ -1,219 +0,0 @@ -from pathlib import Path - -import numpy as np -import pytest - -from poli.objective_repository import ( - AlohaBlackBox, - DockstringBlackBox, - DRD3BlackBox, - FoldXRFPLamboBlackBox, - FoldXSASABlackBox, - FoldXStabilityAndSASABlackBox, - FoldXStabilityBlackBox, - GFPCBasBlackBox, - GFPSelectionBlackBox, - LogPBlackBox, - PenalizedLogPLamboBlackBox, - QEDBlackBox, - RaspBlackBox, - SABlackBox, - SuperMarioBrosBlackBox, - ToyContinuousBlackBox, - WhiteNoiseBlackBox, -) - -TESTS_FOLDER = Path(__file__).parent.parent.resolve() - -SEED = np.random.randint(0, 1000) - -test_data = [ - ("aloha", AlohaBlackBox, {}), - ( - "dockstring", - DockstringBlackBox, - {"target_name": "drd2", "string_representation": "SMILES"}, - ), - ( - "drd3_docking", - DRD3BlackBox, - {"string_representation": "SMILES", "force_isolation": True}, - ), - ( - "foldx_rfp_lambo", - FoldXRFPLamboBlackBox, - {}, - ), - ( - "foldx_sasa", - FoldXSASABlackBox, - { - "wildtype_pdb_path": TESTS_FOLDER - / "static_files_for_tests" - / "101m_Repair.pdb", - "force_isolation": True, - }, - ), - ( - "foldx_stability", - FoldXStabilityBlackBox, - { - "wildtype_pdb_path": TESTS_FOLDER - / "static_files_for_tests" - / "101m_Repair.pdb", - "force_isolation": True, - }, - ), - ( - "foldx_stability_and_sasa", - FoldXStabilityAndSASABlackBox, - { - "wildtype_pdb_path": TESTS_FOLDER - / "static_files_for_tests" - / "101m_Repair.pdb", - "force_isolation": True, - "verbose": True, - }, - ), - ( - "gfp_cbas", - GFPCBasBlackBox, - { - "problem_type": "gp", - "force_isolation": True, - }, - ), - ( - "gfp_cbas", - GFPCBasBlackBox, - { - "problem_type": "vae", - "force_isolation": True, - }, - ), - ( - "gfp_cbas", - GFPCBasBlackBox, - { - "problem_type": "elbo", - "force_isolation": True, - }, - ), - ( - "gfp_select", - GFPSelectionBlackBox, - { - "force_isolation": True, - }, - ), - ( - "penalized_logp_lambo", - PenalizedLogPLamboBlackBox, - { - "force_isolation": False, - }, - ), - ( - "rasp", - RaspBlackBox, - { - "wildtype_pdb_path": TESTS_FOLDER / "static_files_for_tests" / "3ned.pdb", - "force_isolation": False, - }, - ), - ( - "rdkit_logp", - LogPBlackBox, - {}, - ), - ( - "rdkit_qed", - QEDBlackBox, - {}, - ), - ( - "rfp_foldx_stability_and_sasa", - FoldXStabilityAndSASABlackBox, - { - "wildtype_pdb_path": [ - TESTS_FOLDER / "static_files_for_tests" / folder / "wt_input_Repair.pdb" - for folder in [ - "2vad_A", - "2vae_A", - "3e5v_A", - "3ned_A", - "5lk4_A", - "6aa7_A", - ] - ], - "verbose": True, - "batch_size": 1, - "parallelize": True, - }, - ), - ( - "sa_tdc", - SABlackBox, - { - "force_isolation": True, - }, - ), - ( - "super_mario_bros", - SuperMarioBrosBlackBox, - { - "force_isolation": True, - }, - ), - ( - "toy_continuous_problem", - ToyContinuousBlackBox, - {"function_name": "ackley_function_01"}, - ), - ("white_noise", WhiteNoiseBlackBox, {}), -] - - -@pytest.mark.parametrize( - "black_box_name, black_box_class, kwargs_for_black_box", - test_data, -) -@pytest.mark.slow() -def test_instancing_a_black_box_both_ways_matches( - black_box_name, black_box_class, kwargs_for_black_box -): - from poli import create - from poli.core.util.seeding import seed_python_numpy_and_torch - - problem = create( - name=black_box_name, - seed=SEED, - **kwargs_for_black_box, - ) - x0 = problem.x0 - if black_box_name == "foldx_rfp_lambo": - x0 = x0[0].reshape(1, -1) - elif black_box_name == "gfp_select": - x0 = x0[:10] - y0 = problem.black_box(x0) - - seed_python_numpy_and_torch(SEED) - f = black_box_class(**kwargs_for_black_box) - y0_ = f(x0) - - # if problem.info.deterministic: - # TODO: ask Richard about gfp select and rfp_foldx. - if black_box_name == "gfp_select": - return - elif black_box_name == "rfp_foldx_stability_and_sasa": - assert np.allclose(y0[:, 0], y0_[:, 0], atol=1.0) - assert np.allclose(y0_[:, 1], y0[:, 1], atol=2.0) - else: - assert np.allclose(y0_, y0) - - -if __name__ == "__main__": - for black_box_name, black_box_class, kwargs in test_data: - test_instancing_a_black_box_both_ways_matches( - black_box_name, black_box_class, kwargs - ) diff --git a/src/poli/tests/registry/test_force_isolation.py b/src/poli/tests/registry/test_force_isolation.py index 2cfbe68e..8764e67d 100644 --- a/src/poli/tests/registry/test_force_isolation.py +++ b/src/poli/tests/registry/test_force_isolation.py @@ -12,7 +12,3 @@ def test_force_isolation_on_aloha(): ) assert isinstance(problem.black_box, objective_factory.ExternalBlackBox) - - -if __name__ == "__main__": - test_force_isolation_on_aloha() diff --git a/src/poli/tests/registry/test_instancing_black_boxes_alone.py b/src/poli/tests/registry/test_instancing_black_boxes_alone.py deleted file mode 100644 index b878d753..00000000 --- a/src/poli/tests/registry/test_instancing_black_boxes_alone.py +++ /dev/null @@ -1,11 +0,0 @@ -import numpy as np - -from poli.objective_repository import AVAILABLE_BLACK_BOXES - - -def test_instancing_black_boxes_alone(): - from poli.objective_repository import WhiteNoiseBlackBox - - f = WhiteNoiseBlackBox() - - f(np.array([["1", "2", "3"]])) diff --git a/src/poli/tests/registry/test_passing_array_of_strings.py b/src/poli/tests/registry/test_passing_array_of_strings.py index bd4e0531..fecbd7b7 100644 --- a/src/poli/tests/registry/test_passing_array_of_strings.py +++ b/src/poli/tests/registry/test_passing_array_of_strings.py @@ -5,8 +5,6 @@ import pytest -from poli.objective_repository import AVAILABLE_OBJECTIVES - # TODO: parametrize by all non-aligned blackboxes @pytest.mark.parametrize( @@ -127,20 +125,3 @@ def test_passing_array_of_strings( x_non_flat = np.array(example_non_flat_input) assert np.array_equal(f(x_flat), f(x_non_flat), equal_nan=True) - - -if __name__ == "__main__": - test_passing_array_of_strings( - "dockstring", - [ - ["C", ""], - ["C", "C"], - ], - [ - "C", - "CC", - ], - { - "target_name": "ABL1", - }, - ) diff --git a/src/poli/tests/registry/toy_continuous_problems/test_embedding_problems_into_higher_dims.py b/src/poli/tests/registry/toy_continuous_problems/test_embedding_problems_into_higher_dims.py index b9aa2bc6..fa63fa5f 100644 --- a/src/poli/tests/registry/toy_continuous_problems/test_embedding_problems_into_higher_dims.py +++ b/src/poli/tests/registry/toy_continuous_problems/test_embedding_problems_into_higher_dims.py @@ -48,7 +48,3 @@ def test_embed_camelback_into_high_dimensions(): f_camelback(one_x), f_camelback(another_x), ) - - -if __name__ == "__main__": - test_embed_camelback_into_high_dimensions() diff --git a/src/poli/tests/test_core_promises.py b/src/poli/tests/test_core_promises.py index db6c36bc..2c0923de 100644 --- a/src/poli/tests/test_core_promises.py +++ b/src/poli/tests/test_core_promises.py @@ -25,19 +25,3 @@ def test_creating_a_problem(): y0 = f(x0) f.terminate() - - -def test_instancing_a_black_box_that_requires_isolation(): - from poli.objective_repository.dockstring.register import DockstringBlackBox - - f = DockstringBlackBox( - target_name="DRD2", - string_representation="SMILES", - ) - - risperidone_smiles = "CC1=C(C(=O)N2CCCCC2=N1)CCN3CCC(CC3)C4=NOC5=C4C=CC(=C5)F" - - # TODO: replace for proper smiles tokenization. - x0 = np.array([list(risperidone_smiles)]) - - print(f(x0)) diff --git a/src/poli/tests/test_minimal_working_example.py b/src/poli/tests/test_minimal_working_example.py deleted file mode 100644 index 9ce0154a..00000000 --- a/src/poli/tests/test_minimal_working_example.py +++ /dev/null @@ -1,24 +0,0 @@ -def test_minimal_working_example_of_problem_creation(): - """ - Tests the minimal working example from the readme, verbatum. - """ - import numpy as np - - from poli import objective_factory - - white_noise_problem = objective_factory.create(name="white_noise") - f = white_noise_problem.black_box - - x = np.array([["1", "2", "3"]]) # must be of shape [b, L], in this case [1, 3]. - for _ in range(5): - print(f"f(x) = {f(x)}") - - -def test_minimal_working_example_of_black_box_instancing(): - import numpy as np - - from poli.objective_repository import WhiteNoiseBlackBox - - f = WhiteNoiseBlackBox() - x = np.array([["1", "2", "3"]]) - print(f"f(x) = {f(x)}") diff --git a/src/poli/tests/test_seeding.py b/src/poli/tests/test_seeding.py index 86cd6a7d..4b41ad4c 100644 --- a/src/poli/tests/test_seeding.py +++ b/src/poli/tests/test_seeding.py @@ -28,7 +28,3 @@ def test_seeding_in_white_noise_factory_creation(): y2 = f(x0) assert (y0 == y1).all() and not np.isclose(y0, y2).all() - - -if __name__ == "__main__": - test_seeding_in_white_noise_factory_creation() diff --git a/src/poli/tests/util/test_foldx_interface.py b/src/poli/tests/util/test_foldx_interface.py index 48e5f851..efcb4fdb 100644 --- a/src/poli/tests/util/test_foldx_interface.py +++ b/src/poli/tests/util/test_foldx_interface.py @@ -26,6 +26,7 @@ THIS_DIR = Path(__file__).parent.resolve() +@pytest.mark.poli__protein class TestFoldxInterface: wildtype_pdb_path = THIS_DIR / "3ned.pdb" tmp_path = THIS_DIR / "tmp" diff --git a/src/poli/tests/util/test_protein_utilities.py b/src/poli/tests/util/test_protein_utilities.py index fca0e4c7..3f9f5616 100644 --- a/src/poli/tests/util/test_protein_utilities.py +++ b/src/poli/tests/util/test_protein_utilities.py @@ -36,6 +36,7 @@ THIS_DIR = Path(__file__).parent.resolve() +@pytest.mark.poli__protein class TestClosestPDBFilesToMutation: wildtype_pdb_paths = [ THIS_DIR / "3ned.pdb", diff --git a/tox.ini b/tox.ini index 8b7d2c48..833cef24 100644 --- a/tox.ini +++ b/tox.ini @@ -2,6 +2,10 @@ env_list = lint poli-base-py39 + poli-dockstring-py39 + poli-tdc-py39 + poli-protein-py39 + poli-rasp-py39 minversion = 4.10.0 [testenv] @@ -15,34 +19,15 @@ deps = commands = sh -c "rm -rf ~/.poli_objectives/*.sh" sh -c "rm -rf ~/.poli_objectives/config.rc" - sh -c 'if conda info --envs | grep -q poli__base; then echo "poli__base already exists"; else conda env create -f ./src/poli/objective_repository/aloha/environment.yml; fi' - sh -c "conda run -n poli__base python -m pip uninstall -y poli" - sh -c "conda run -n poli__base python -m pip install -e ." - sh -c 'if conda info --envs | grep -q poli__chem; then echo "poli__chem already exists"; else conda env create -f ./src/poli/objective_repository/rdkit_qed/environment.yml; fi' - sh -c "conda run -n poli__chem python -m pip uninstall -y poli" - sh -c "conda run -n poli__chem python -m pip install -e ." - sh -c 'if conda info --envs | grep -q poli__protein; then echo "poli__protein already exists"; else conda env create -f ./src/poli/objective_repository/foldx_stability/environment.yml; fi' - sh -c "conda run -n poli__protein python -m pip uninstall -y poli" - sh -c "conda run -n poli__protein python -m pip install -e ." - sh -c 'if conda info --envs | grep -q poli__dockstring; then echo "poli__dockstring already exists"; else conda env create -f ./src/poli/objective_repository/dockstring/environment.yml; fi' - sh -c "conda run -n poli__dockstring python -m pip uninstall -y poli" - sh -c "conda run -n poli__dockstring python -m pip install -e ." - sh -c 'if conda info --envs | grep -q poli__tdc; then echo "poli__tdc already exists"; else conda env create -f ./src/poli/core/chemistry/environment.yml; fi' - sh -c "conda run -n poli__tdc python -m pip uninstall -y poli" - sh -c "conda run -n poli__tdc python -m pip install -e ." - sh -c 'if conda info --envs | grep -q poli__lambo; then echo "poli__lambo already exists"; else conda env create -f ./src/poli/objective_repository/foldx_rfp_lambo/environment.yml; fi' - sh -c "conda run -n poli__lambo python -m pip uninstall -y poli" - sh -c "conda run -n poli__lambo python -m pip install -e ." - pytest {tty:--color=yes} -v -m "not slow" {posargs} - sh -c "rm -rf ~/.poli_objectives/*.sh" - sh -c "rm -rf ~/.poli_objectives/config.rc" [testenv:lint] description = check the code style with black deps = black + isort commands = black --check --diff . + isort --profile black --check-only src/ [testenv:poli-base-py39] description = run the tests with pytest on the base environment for poli @@ -54,3 +39,75 @@ deps= -e. commands= {[testenv]commands} + pytest {tty:--color=yes} -v -m "unmarked" {posargs} + +[testenv:poli-tdc-py39] +description = run the tests with pytest on the TDC environment for poli +basepython = python3.9 +wheel_build_env = .pkg +deps= + {[testenv]deps} + -r requirements.txt + -e. +commands= + sh -c 'if conda info --envs | grep -q poli__tdc; then echo "poli__tdc already exists"; else conda env create -f ./src/poli/core/chemistry/environment.yml; fi' + sh -c "conda run -n poli__tdc python -m pip uninstall -y poli" + sh -c "conda run -n poli__tdc python -m pip install -e ." + pytest {tty:--color=yes} -v -m "not slow and poli__tdc" {posargs} + +[testenv:poli-protein-py39] +description = run the tests with pytest on the protein environment for poli +basepython = python3.9 +wheel_build_env = .pkg +deps= + {[testenv]deps} + -r requirements.txt + -e. +commands= + sh -c 'if conda info --envs | grep -q poli__protein; then echo "poli__protein already exists"; else conda env create -f ./src/poli/objective_repository/foldx_stability/environment.yml; fi' + sh -c "conda run -n poli__protein python -m pip uninstall -y poli" + sh -c "conda run -n poli__protein python -m pip install -e ." + pytest {tty:--color=yes} -v -m "not slow and poli__protein" {posargs} + +[testenv:poli-lambo-py39] +description = run the tests with pytest on the lambo environment for poli +basepython = python3.9 +wheel_build_env = .pkg +deps= + {[testenv]deps} + -r requirements.txt + -e. +commands= + sh -c 'if conda info --envs | grep -q poli__lambo; then echo "poli__lambo already exists"; else conda env create -f ./src/poli/objective_repository/foldx_rfp_lambo/environment.yml; fi' + sh -c "conda run -n poli__lambo python -m pip uninstall -y poli" + sh -c "conda run -n poli__lambo python -m pip install -e ." + pytest {tty:--color=yes} -v -m "not slow and poli__lambo" {posargs} + +[testenv:poli-dockstring-py39] +description = run the tests with pytest on the dockstring environment for poli +basepython = python3.9 +wheel_build_env = .pkg +deps= + {[testenv]deps} + -r requirements.txt + -e. +commands= + sh -c 'if conda info --envs | grep -q poli__dockstring; then echo "poli__dockstring already exists"; else conda env create -f ./src/poli/objective_repository/dockstring/environment.yml; fi' + sh -c "conda run -n poli__dockstring python -m pip uninstall -y poli" + sh -c "conda run -n poli__dockstring python -m pip install -e ." + pytest {tty:--color=yes} -v -m "not slow and poli__dockstring" {posargs} + +[testenv:poli-rasp-py39] +description = run the tests with pytest on the dockstring environment for poli +basepython = python3.9 +wheel_build_env = .pkg +deps= + {[testenv]deps} + -r requirements.txt + -e. +commands= + sh -c 'if conda info --envs | grep -q poli__rasp; then echo "poli__rasp already exists"; else conda env create -f ./src/poli/objective_repository/rasp/environment.yml; fi' + sh -c "conda run -n poli__rasp python -m pip uninstall -y poli" + sh -c "conda run -n poli__rasp python -m pip install -e ." + pytest {tty:--color=yes} -v -m "not slow and poli__rasp" {posargs} + From e949f6bee9ba70e0b27bbbfe4c5c5e8e42dfd170 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Gonz=C3=A1lez=20Duque?= Date: Fri, 12 Jul 2024 11:30:35 +0200 Subject: [PATCH 07/10] Update readme with black box info and add cronjobs to tests (#218) * Updates readme with links to docs of the black boxes * Updates the names of the workflows * Moves one column in the table --- .github/workflows/python-tox-testing-base.yml | 7 +++-- .../python-tox-testing-dockstring-env.yml | 7 +++-- .../python-tox-testing-lambo-env.yml | 7 +++-- .../python-tox-testing-protein-env.yml | 7 +++-- .../workflows/python-tox-testing-tdc-env.yml | 7 +++-- README.MD | 27 ++++++++++++------- 6 files changed, 42 insertions(+), 20 deletions(-) diff --git a/.github/workflows/python-tox-testing-base.yml b/.github/workflows/python-tox-testing-base.yml index 5c4ec870..17babc67 100644 --- a/.github/workflows/python-tox-testing-base.yml +++ b/.github/workflows/python-tox-testing-base.yml @@ -1,6 +1,9 @@ -name: poli base (dev, conda, python 3.9) +name: poli base (conda, py3.9) -on: [push] +on: + push: + schedule: + - cron: '0 0 * * 0' jobs: build-linux: diff --git a/.github/workflows/python-tox-testing-dockstring-env.yml b/.github/workflows/python-tox-testing-dockstring-env.yml index 3f40c42b..f9a8dc25 100644 --- a/.github/workflows/python-tox-testing-dockstring-env.yml +++ b/.github/workflows/python-tox-testing-dockstring-env.yml @@ -1,6 +1,9 @@ -name: poli dockstring (dev, conda, python 3.9) +name: poli dockstring (conda, py3.9) -on: [push] +on: + push: + schedule: + - cron: '0 0 * * 0' jobs: build-linux: diff --git a/.github/workflows/python-tox-testing-lambo-env.yml b/.github/workflows/python-tox-testing-lambo-env.yml index ed15510f..b9ba4929 100644 --- a/.github/workflows/python-tox-testing-lambo-env.yml +++ b/.github/workflows/python-tox-testing-lambo-env.yml @@ -1,6 +1,9 @@ -name: poli lambo (dev, conda, python 3.9) +name: poli lambo (conda, py3.9) -on: [push] +on: + push: + schedule: + - cron: '0 0 * * 0' jobs: build-linux: diff --git a/.github/workflows/python-tox-testing-protein-env.yml b/.github/workflows/python-tox-testing-protein-env.yml index 453a3f88..799cbd62 100644 --- a/.github/workflows/python-tox-testing-protein-env.yml +++ b/.github/workflows/python-tox-testing-protein-env.yml @@ -1,6 +1,9 @@ -name: poli protein (dev, conda, python 3.9) +name: poli protein (conda, py3.9) -on: [push] +on: + push: + schedule: + - cron: '0 0 * * 0' jobs: build-linux: diff --git a/.github/workflows/python-tox-testing-tdc-env.yml b/.github/workflows/python-tox-testing-tdc-env.yml index b6d1c803..77044361 100644 --- a/.github/workflows/python-tox-testing-tdc-env.yml +++ b/.github/workflows/python-tox-testing-tdc-env.yml @@ -1,6 +1,9 @@ -name: poli tdc (dev, conda, python 3.9) +name: poli tdc (conda, py3.9) -on: [push] +on: + push: + schedule: + - cron: '0 0 * * 0' jobs: build-linux: diff --git a/README.MD b/README.MD index 31d4c416..b66b09c7 100644 --- a/README.MD +++ b/README.MD @@ -1,14 +1,22 @@ -# poli 🧪, a library for discrete objective functions +# `poli` 🧪, a library for discrete objective functions -[![Testing (conda, python 3.9)](https://github.com/MachineLearningLifeScience/poli/actions/workflows/python-tox-testing-including-conda.yml/badge.svg)](https://github.com/MachineLearningLifeScience/poli/actions/workflows/python-tox-testing-including-conda.yml) -[![Link to documentation](https://img.shields.io/badge/docs-poli_docs-blue)](https://machinelearninglifescience.github.io/poli-docs/) +[![poli base (dev, conda, python 3.9)](https://github.com/MachineLearningLifeScience/poli/actions/workflows/python-tox-testing-base.yml/badge.svg)](https://github.com/MachineLearningLifeScience/poli/actions/workflows/python-tox-testing-base.yml) +[![Link to documentation](https://img.shields.io/badge/documentation-poli_docs-blue)](https://machinelearninglifescience.github.io/poli-docs/) -poli is a library of discrete objective functions for benchmarking optimization algorithms. Examples include: -- 🔬 **stability** of mutations from a wildtype protein (using [foldx](https://foldxsuite.crg.eu/) or [rasp](https://github.com/KULL-Centre/_2022_ML-ddG-Blaabjerg)). -- 🧪 **docking scores** of ligands to proteins (using [dockstring](https://github.com/dockstring/dockstring), [pyscreener](https://github.com/coleygroup/pyscreener) and [pytdc](https://tdcommons.ai/functions/oracles/)). -- 💊 **druglikeness** or **synthetic acccesibility** of small molecules (using [rdkit](https://github.com/rdkit/rdkit) and [pytdc](https://tdcommons.ai/functions/oracles/)). +`poli` is a library of discrete objective functions for benchmarking optimization algorithms. -Some of `poli`'s features: +## Black boxes + +| Black box | References | Tests +|----------|----------|----------| +| [Toy continuous functions (e.g. Ackley, Hartmann...)](https://machinelearninglifescience.github.io/poli-docs/using_poli/objective_repository/toy_continuous_problems.html) | [(Al-Roomi 2015)](https://www.al-roomi.org/benchmarks/unconstrained), [(Surjanovic & Bingham 2013)](https://www.sfu.ca/~ssurjano/optimization.html) | [![poli base (dev, conda, python 3.9)](https://github.com/MachineLearningLifeScience/poli/actions/workflows/python-tox-testing-base.yml/badge.svg)](https://github.com/MachineLearningLifeScience/poli/actions/workflows/python-tox-testing-base.yml) | +| Ehrlich functions | [(Stanton et al. 2024)](https://arxiv.org/abs/2407.00236) | [![poli base (dev, conda, python 3.9)](https://github.com/MachineLearningLifeScience/poli/actions/workflows/python-tox-testing-base.yml/badge.svg)](https://github.com/MachineLearningLifeScience/poli/actions/workflows/python-tox-testing-base.yml) +| [PMO/GuacaMol benchmark](https://machinelearninglifescience.github.io/poli-docs/#small-molecules) | [(Brown et al. 2019)](https://arxiv.org/abs/1811.09621), [(Gao et al. 2022)](https://openreview.net/forum?id=yCZRdI0Y7G), [(Huang et al. 2021)](https://openreview.net/pdf?id=8nvgnORnoWr) | [![poli tdc (dev, conda, python 3.9)](https://github.com/MachineLearningLifeScience/poli/actions/workflows/python-tox-testing-tdc-env.yml/badge.svg)](https://github.com/MachineLearningLifeScience/poli/actions/workflows/python-tox-testing-tdc-env.yml) +| [Dockstring](https://machinelearninglifescience.github.io/poli-docs/using_poli/objective_repository/dockstring.html) | [(García-Ortegón et al. 2022)](https://pubs.acs.org/doi/full/10.1021/acs.jcim.1c01334) | [![poli dockstring (dev, conda, python 3.9)](https://github.com/MachineLearningLifeScience/poli/actions/workflows/python-tox-testing-dockstring-env.yml/badge.svg)](https://github.com/MachineLearningLifeScience/poli/actions/workflows/python-tox-testing-dockstring-env.yml) +| [FoldX stability and SASA](https://machinelearninglifescience.github.io/poli-docs/#proteins) | [(Schymkowitz et al. 2005)](https://academic.oup.com/nar/article/33/suppl_2/W382/2505499?login=true) | - | +| [RaSP](https://machinelearninglifescience.github.io/poli-docs/using_poli/objective_repository/RaSP.html) | [(Blaabjerg et al. 2023)](https://elifesciences.org/articles/82593) | - + +## Features - 🔲 **isolation** of black box function calls inside conda environments. Don't worry about clashes w. black box requirements, poli will create the relevant conda environments for you. - 🗒️ **logging** each black box call using observers. - A numpy interface. Inputs are `np.array`s of strings, outputs are `np.array`s of floats. @@ -27,8 +35,7 @@ pip install git+https://github.com/MachineLearningLifeScience/poli.git@dev To check if everything went well, you can run ```bash -$ python -c "from poli import get_problems ; print(get_problems())" -['aloha', ..., 'white_noise'] +$ python -c "from poli import create" ``` ### An example: dockstring From 8b5a3996448fd2d59739ba4ff22bef6204ccc2b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Gonz=C3=A1lez=20Duque?= Date: Thu, 18 Jul 2024 17:02:23 +0200 Subject: [PATCH 08/10] Uses default observer if none is found in the config file (#221) --- src/poli/objective_factory.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/poli/objective_factory.py b/src/poli/objective_factory.py index fba11cce..2777c403 100644 --- a/src/poli/objective_factory.py +++ b/src/poli/objective_factory.py @@ -435,6 +435,9 @@ def _instantiate_observer(observer_name: str, quiet: bool = False) -> AbstractOb The black-box function, initial value, and related information. """ + if _OBSERVER not in registry.config[_DEFAULT]: + registry.config[_DEFAULT][_OBSERVER] = _DEFAULT_OBSERVER_RUN_SCRIPT + observer_script: str = registry.config[_DEFAULT][_OBSERVER] if observer_name is not None: if observer_name != DEFAULT_OBSERVER_NAME: From 664a69b97bda02e491ac53a0c738a3014efb9bbd Mon Sep 17 00:00:00 2001 From: Richard Michael Date: Thu, 25 Jul 2024 14:41:46 +0200 Subject: [PATCH 09/10] [WIP] 207 add the work of neidhart et al as a black box (#214) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add RMF objective, logic, info, and factory * add inner_function on init, add module_to_import to inner_function get * correction shape and object types * add tests availability, isolation, batch_eval * added random state as class property * black, linting * add test batch eval * corrections: compute HD, batched results in loop, OH to int encoding * add tests seeding and expected values * add environment marker * add poli-rmf as testenv * add github workflow * move imports into env markers * Modifies the name of the isolated function * fix tox test commands * no conda runs but shell invoke * Uses default observer if none is found in the config file (#221) * __future__ type annotations * Add blackbox docstring * removed deprecated test --------- Co-authored-by: Richard Michael Co-authored-by: Miguel González Duque --- .../workflows/python-tox-testing-rmf-env.yml | 29 ++ pyproject.toml | 1 + src/poli/objective_repository/__init__.py | 3 + .../rmf_landscape/__init__.py | 10 + .../rmf_landscape/environment.yml | 11 + .../rmf_landscape/information.py | 15 + .../rmf_landscape/isolated_function.py | 132 +++++++++ .../rmf_landscape/register.py | 257 ++++++++++++++++++ src/poli/tests/registry/proteins/test_rmf.py | 118 ++++++++ tox.ini | 16 +- 10 files changed, 591 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/python-tox-testing-rmf-env.yml create mode 100644 src/poli/objective_repository/rmf_landscape/__init__.py create mode 100644 src/poli/objective_repository/rmf_landscape/environment.yml create mode 100644 src/poli/objective_repository/rmf_landscape/information.py create mode 100644 src/poli/objective_repository/rmf_landscape/isolated_function.py create mode 100644 src/poli/objective_repository/rmf_landscape/register.py create mode 100644 src/poli/tests/registry/proteins/test_rmf.py diff --git a/.github/workflows/python-tox-testing-rmf-env.yml b/.github/workflows/python-tox-testing-rmf-env.yml new file mode 100644 index 00000000..3d26cdc1 --- /dev/null +++ b/.github/workflows/python-tox-testing-rmf-env.yml @@ -0,0 +1,29 @@ +name: poli rmf (conda, py3.9) + +on: + push: + schedule: + - cron: '0 0 * * 0' + +jobs: + build-linux: + runs-on: ubuntu-latest + strategy: + max-parallel: 5 + + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.9 + uses: actions/setup-python@v3 + with: + python-version: '3.9' + - name: Add conda to system path + run: | + # $CONDA is an environment variable pointing to the root of the miniconda directory + echo $CONDA/bin >> $GITHUB_PATH + - name: Install dependencies + run: | + python -m pip install tox + - name: Test rmf-related black boxes with tox and pytest + run: | + tox -c tox.ini -e poli-rmf-py39 diff --git a/pyproject.toml b/pyproject.toml index ded40ea2..e48aff29 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ markers = [ "poli__tdc: marks tests that run in the poli__tdc environment", "poli__protein: marks tests that run in the poli__protein environment", "poli__rasp: marks tests that run in the poli__rasp environment", + "poli__rmf: marks tests that run in poli__rmf environment", "unmarked: All other tests, which usually run in the base environment", ] diff --git a/src/poli/objective_repository/__init__.py b/src/poli/objective_repository/__init__.py index 256c66e6..b9d4e7e5 100644 --- a/src/poli/objective_repository/__init__.py +++ b/src/poli/objective_repository/__init__.py @@ -75,6 +75,7 @@ from .rfp_foldx_stability_and_sasa.register import ( RFPFoldXStabilityAndSASAProblemFactory, ) +from .rmf_landscape.register import RMFBlackBox, RMFProblemFactory from .sa_tdc.register import SABlackBox, SAProblemFactory from .scaffold_hop.register import ScaffoldHopBlackBox, ScaffoldHopProblemFactory from .sitagliptin_mpo.register import ( @@ -138,6 +139,7 @@ "rdkit_logp": LogPProblemFactory, "rdkit_qed": QEDProblemFactory, "rfp_foldx_stability_and_sasa": RFPFoldXStabilityAndSASAProblemFactory, + "rmf_landscape": RMFProblemFactory, "sa_tdc": SAProblemFactory, "super_mario_bros": SuperMarioBrosProblemFactory, "white_noise": WhiteNoiseProblemFactory, @@ -182,6 +184,7 @@ "rdkit_logp": LogPBlackBox, "rdkit_qed": QEDBlackBox, "rfp_foldx_stability_and_sasa": FoldXStabilityAndSASABlackBox, + "rmf_landscape": RMFBlackBox, "sa_tdc": SABlackBox, "super_mario_bros": SuperMarioBrosBlackBox, "white_noise": WhiteNoiseBlackBox, diff --git a/src/poli/objective_repository/rmf_landscape/__init__.py b/src/poli/objective_repository/rmf_landscape/__init__.py new file mode 100644 index 00000000..70d8b601 --- /dev/null +++ b/src/poli/objective_repository/rmf_landscape/__init__.py @@ -0,0 +1,10 @@ +"""Rough Mount Fuji (RMF) fitness landscapes w/ tunable ruggedness using Numpy. +See J Neidhart, IG Szendro, J Krug + Adaptation in Tunably Rugged Fitness Landscapes: The Rough Mount Fuji Model. + Genetics 2014 . + DOI: https://doi.org/10.1534/genetics.114.167668 +See Aita et al. + Analysis of a local fitness landscape with a model of the rough Mt. Fuji-type landscape: Application to prolyl endopeptidase and thermolysin. + Biopolymers 2000 . + DOI: https://doi.org/10.1002/(SICI)1097-0282(200007)54:1<64::AID-BIP70>3.0.CO;2-R +""" diff --git a/src/poli/objective_repository/rmf_landscape/environment.yml b/src/poli/objective_repository/rmf_landscape/environment.yml new file mode 100644 index 00000000..66359bce --- /dev/null +++ b/src/poli/objective_repository/rmf_landscape/environment.yml @@ -0,0 +1,11 @@ +name: poli__rmf +channels: + - conda-forge + - defaults +dependencies: + - python=3.9 + - pip=23.2.1 + - pip: + - numpy + - "git+https://github.com/MachineLearningLifeScience/poli.git@dev" + - scipy \ No newline at end of file diff --git a/src/poli/objective_repository/rmf_landscape/information.py b/src/poli/objective_repository/rmf_landscape/information.py new file mode 100644 index 00000000..17c0d804 --- /dev/null +++ b/src/poli/objective_repository/rmf_landscape/information.py @@ -0,0 +1,15 @@ +import numpy as np + +from poli.core.black_box_information import BlackBoxInformation +from poli.core.util.proteins.defaults import AMINO_ACIDS + +rmf_info = BlackBoxInformation( + name="rmf_landscape", + max_sequence_length=np.inf, + aligned=True, + fixed_length=True, + deterministic=False, + alphabet=AMINO_ACIDS, # TODO: differentiate between AA and NA inputs? + log_transform_recommended=False, + discrete=True, +) diff --git a/src/poli/objective_repository/rmf_landscape/isolated_function.py b/src/poli/objective_repository/rmf_landscape/isolated_function.py new file mode 100644 index 00000000..6ec63d1d --- /dev/null +++ b/src/poli/objective_repository/rmf_landscape/isolated_function.py @@ -0,0 +1,132 @@ +from __future__ import annotations + +import logging +from random import seed +from typing import List, Optional + +import numpy as np +from scipy.spatial.distance import hamming +from scipy.stats import genpareto + +from poli.core.abstract_isolated_function import AbstractIsolatedFunction +from poli.core.util.proteins.defaults import AMINO_ACIDS, ENCODING + + +class RMFIsolatedLogic(AbstractIsolatedFunction): + """ + RMF internal logic. + + Parameters + ---------- + wildtype : List[str] + String sequence of the reference, default: None. + c : float, optional + + alphabet : List[str] + Alphabet for the problem, by default AA list provided from poli.core.util.proteins.defaults + stochasticity: str, optional + Methods + ------- + _black_box(x, context=None) + Main black box method to compute the fitness value of x relative to the WT. + + Raises + ------ + AssertionError + If no wildtype sequence is provided. + """ + + def __init__( + self, + wildtype: List[str], + wt_val: float | None = 0.0, + c: float | None = None, + kappa: float | None = 0.1, + alphabet: List[str] | None = None, + seed: int | None = 0, + ) -> None: + """ + Initialize the RMFBlackBox object. + """ + assert wildtype is not None, ( + "Missing reference input sequence. " + "Did you forget to pass it to the create of the black box?" + ) + oracle_name = "RMF" + if not isinstance(wildtype, np.ndarray): + wildtype = np.array(list(wildtype)) + self.wildtype = wildtype + self.seed = seed + if alphabet is None: + logging.info("using default alphabet AAs.") + alphabet = AMINO_ACIDS + assert all( + [aa in ENCODING.keys() for aa in wildtype] + ), "Input wildtype elements not in encoding alphabet." + self.wt_int = np.array([ENCODING.get(aa) for aa in wildtype]) + if c is None: + c = 1 / (len(alphabet) - 1) + else: + c = c + assert c >= 0, "Invalid c : c > 0 required!" + logging.info(f"setting c={c}") + # if c == 0 : uncorrelated HoC landscape (?) + self.c = c + self.kappa = kappa + self.f_0 = ( + wt_val # in case of standardized observations (around WT) assume w.l.o.g. + ) + self.alphabet = alphabet + eta_var = genpareto.stats(c, moments="v") + self.theta = c / np.sqrt(eta_var) + self.rng = np.random.default_rng(seed) + logging.info(f"landscape theta={self.theta}") + super().__init__() + + @staticmethod + def f( + f0: float, + sigma: np.ndarray, + sigma_star: np.ndarray, + c: float, + kappa: float, + rand_state, + ) -> float: + L = len(sigma) + # from [1] (2) additive term via Hamming distance and constant + # hamm_dist = hamming(sigma.flatten(), sigma_star.flatten()) # NOTE scipy HD is normalized, DON't USE + hamm_dist = np.sum(sigma != sigma_star) + # from [2] nonadd. term is single small value accroding to RV, we use [1]gen.Pareto RV instead of Gaussian + eta = genpareto.rvs(kappa, size=1, random_state=rand_state) + # NOTE [1] describes eta as 2^L i.i.d. RV vector, which does not yield a single function value + f_p = f0 + -c * hamm_dist + f_val = f_p + eta + return f_val + + def __call__(self, x: np.ndarray, context=None) -> np.ndarray: + values = [] + for sequence in x: + L = len(sequence) + assert L == self.wildtype.shape[-1], "Inconsistent length: undefined." + x_int = np.array([ENCODING.get(aa) for aa in sequence]) + val = self.f( + f0=self.f_0, + sigma=x_int, + sigma_star=self.wt_int, + c=self.c, + kappa=self.kappa, + rand_state=self.rng, + ) + values.append(val) + return np.array(values).reshape(-1, 1) + + +if __name__ == "__main__": + from poli.core.registry import register_isolated_function + + register_isolated_function( + RMFIsolatedLogic, + name="rmf_landscape__isolated", + conda_environment_name="poli__rmf", + force=True, + ) diff --git a/src/poli/objective_repository/rmf_landscape/register.py b/src/poli/objective_repository/rmf_landscape/register.py new file mode 100644 index 00000000..8c5c6e34 --- /dev/null +++ b/src/poli/objective_repository/rmf_landscape/register.py @@ -0,0 +1,257 @@ +""" +Implements a (tunable) fitness landscape of the type RMF [1] for NA [1], AA [2] inputs. + +References +---------- +[1] Adaptation in Tunably Rugged Fitness Landscapes: The Rough Mount Fuji Model. + Neidhart J., Szendro I.G., and Krug, J. Genetics 198, 699-721 (2014). https://doi.org/10.1534/genetics.114.167668 +[2] Analysis of a local fitness landscape with a model of the rough Mt. Fuji-type landscape: Application to prolyl endopeptidase and thermolysin. + Aita T., Uchiyama H., et al. Biopolymers 54, 64-79 (2000). https://doi.org/10.1002/(SICI)1097-0282(200007)54:1<64::AID-BIP70>3.0.CO;2-R +""" + +from __future__ import annotations + +from typing import List, Optional, Union + +import numpy as np + +from poli.core.abstract_black_box import AbstractBlackBox +from poli.core.abstract_problem_factory import AbstractProblemFactory +from poli.core.black_box_information import BlackBoxInformation +from poli.core.problem import Problem +from poli.core.util.isolation.instancing import ( + get_inner_function, + instance_function_as_isolated_process, +) +from poli.core.util.seeding import seed_python_numpy_and_torch +from poli.objective_repository.rmf_landscape.information import rmf_info + + +class RMFBlackBox(AbstractBlackBox): + """ + RMF Black Box implementation. + + Parameters + ---------- + wildtype : str + The wildtype amino-acid sequence (aka reference sequence) against which all RMF values are computed against. + wt_val : float , optional + The reference value for the WT, zero if observations are standardized, else float value e.g. ddGs + c : float, optional + Constant scalar used in RMF computation, by default is the normalizing constant relative to alphabet size + kappa : float, optional + Parameterizes the generalized Pareto distribution, by default 0.1 . + Determines what type of distribution will be sampled from exponential family, Weibull, etc. + seed : int, optional + Random seed for replicability of results, by default None. + alphabet : List[str], optional + Type of alphabet of the sequences, by default Amino Acids. + Nucleic Acids possible. + batch_size : int, optional + The batch size for parallel evaluation, by default None. + parallelize : bool, optional + Flag to parallelize evaluation, by default False. + num_workers : int, optional + The number of workers for parallel evaluation, by default None. + evaluation_budget : int, optional + The evaluation budget, by default float("inf"). + force_isolation : bool, optional + Run in an isolated environment and process, by default False. + """ + + def __init__( + self, + wildtype: str, + wt_val: float = 0.0, + c: float | None = None, + kappa: float = 0.1, + seed: int | None = None, + alphabet: List[str] | None = None, + batch_size: int | None = None, + parallelize: bool | None = False, + num_workers: int | None = None, + evaluation_budget: int | None = float("inf"), + force_isolation: bool = False, + ) -> None: + """ + Initialize the RMFBlackBox object. + + Parameters + ---------- + batch_size : int, optional + The batch-size for parallel evaluation, default: None. + parallelize : bool, optional + Flag to parallelize the evaluation, default: False. + num_workers : int, optional + Number of workers for parallel evaluation, default: None. + evaluation_budget : int, optional + Maximum number of evaluations, default: float("inf"). + force_isolation: bool + Run the blackbox in an isolated environment, default: False. + """ + super().__init__( + batch_size=batch_size, + parallelize=parallelize, + num_workers=num_workers, + evaluation_budget=evaluation_budget, + ) + self.wildtype = wildtype + self.wt_val = wt_val + self.c = c + self.kappa = kappa + self.alphabet = alphabet + self.seed = seed + self.force_isolation = force_isolation + inner_function = get_inner_function( # NOTE: this implicitly registers + isolated_function_name="rmf_landscape__isolated", + class_name="RMFIsolatedLogic", + module_to_import="poli.objective_repository.rmf_landscape.isolated_function", + wildtype=self.wildtype, + wt_val=self.wt_val, + c=self.c, + kappa=self.kappa, + alphabet=self.alphabet, + seed=self.seed, + force_isolation=self.force_isolation, + ) + + def _black_box(self, x: np.ndarray, context: None) -> np.ndarray: + """ + Runs the given input x provided + in the context with the RMF function and returns the + total fitness score. + + Parameters + ----------- + x : np.ndarray + The input array of strings containing mutations. + context : None + The context for the black box computation. + + Returns + -------- + y: np.ndarray + The computed fitness score(s) as a numpy array. + """ + inner_function = get_inner_function( + isolated_function_name="rmf_landscape__isolated", + class_name="RMFIsolatedLogic", + module_to_import="poli.objective_repository.rmf_landscape.isolated_function", + wildtype=self.wildtype, + wt_val=self.wt_val, + c=self.c, + kappa=self.kappa, + alphabet=self.alphabet, + seed=self.seed, + force_isolation=self.force_isolation, + ) + return inner_function(x, context) + + @staticmethod + def get_black_box_info() -> BlackBoxInformation: + return rmf_info + + +class RMFProblemFactory(AbstractProblemFactory): + """ + Problem factory for the rough Mt Fuji model problem. + + Methods + ------- + get_setup_information() + returns problem setup information. + create(...) + Creates RMF problem instance with specified parameters. + """ + + def get_setup_information(self) -> BlackBoxInformation: + return rmf_info + + def create( + self, + wildtype: List[str] | str, + wt_val: float | None = 0.0, + c: float | None = None, + kappa: float = 0.1, + alphabet: List[str] | None = None, + seed: int = None, + batch_size: int = None, + parallelize: bool = False, + num_workers: int = None, + evaluation_budget: int = float("inf"), + force_isolation: bool = False, + ) -> Problem: + """ + Create a RMFBlackBox object. + + Parameters + ---------- + wildtype : List[str] | str + Reference (wild-type) sequence is pseudo-optimum on start. + wt_val : float, optional + Reference function value (standardized observations) of WT. + c : float, optional + Constant value for function value computation. + If None passed default value is regularizing 1/(len(alphabet)-1) . + kappa: float + Determines generalized Pareto continuous RV. + alphabet: List[str], optional + Problem alphabet used, if None is passed default: AMINO_ACIDS. + seed : int, optional + Seed for random number generators. If None is passed, + the seeding doesn't take place. + batch_size : int, optional + Number of samples per batch for parallel computation. + parallelize : bool, optional + Flag indicating whether to parallelize the computation. + num_workers : int, optional + Number of worker processes for parallel computation. + evaluation_budget: int, optional + The maximum number of function evaluations. Default is infinity. + + Returns + ------- + problem : Problem + A problem instance containing a RMFBlackBox + function, and initial wildtypes x0. + + Raises + ------ + ValueError + If wildtype reference sequence is missing. + """ + if seed is not None: + seed_python_numpy_and_torch(seed) + + if wildtype is None: + raise ValueError("Missing reference sequence!") + + if isinstance(wildtype, str): + wildtype = list(wildtype) + + f = RMFBlackBox( + wildtype=wildtype, + wt_val=wt_val, + c=c, + kappa=kappa, + alphabet=alphabet, + seed=seed, + batch_size=batch_size, + parallelize=parallelize, + num_workers=num_workers, + evaluation_budget=evaluation_budget, + force_isolation=force_isolation, + ) + x0 = np.array(wildtype).reshape(1, len(wildtype)) + problem = Problem(f, x0) + return problem + + +if __name__ == "__main__": + from poli.core.registry import register_problem + + rmf_problem_factory = RMFProblemFactory() + register_problem( + rmf_problem_factory, + conda_environment_name="poli__rmf", + ) diff --git a/src/poli/tests/registry/proteins/test_rmf.py b/src/poli/tests/registry/proteins/test_rmf.py new file mode 100644 index 00000000..28440054 --- /dev/null +++ b/src/poli/tests/registry/proteins/test_rmf.py @@ -0,0 +1,118 @@ +import numpy as np +import pytest + +from poli import objective_factory +from poli.objective_repository import AVAILABLE_PROBLEM_FACTORIES + +ref_aa_seq = "HPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWNPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW" + + +@pytest.mark.poli__rmf +def test_force_isolation_rmf_landscape(): + """ + Test if we can force-register the rmf_landscape problem. + """ + problem = objective_factory.create( + name="rmf_landscape", + wildtype=ref_aa_seq, + kappa=-100, # keep noise low + force_isolation=True, + ) + f, x0 = problem.black_box, problem.x0 + y0 = f(x0) + assert np.isclose(np.round(y0), 0.0) + f.terminate() + + +@pytest.mark.poli__rmf +def test_rmf_landscape_init(): + problem = objective_factory.create( + name="rmf_landscape", + wildtype=ref_aa_seq, + kappa=-100, + ) + f, x0 = problem.black_box, problem.x0 + y0 = f(x0) + assert np.isclose(np.round(y0), 0.0) + f.terminate() + + +@pytest.mark.poli__rmf +def test_rmf_landscape_batch_eval(): + problem = objective_factory.create( + name="rmf_landscape", + wildtype=ref_aa_seq, + ) + N = 10 + f, x0 = problem.black_box, problem.x0 + y0 = f(x0) + x_t = [] + seq_b = x0.copy() + seq_b[0, 1] = "Y" + x_t = np.vstack([seq_b for _ in range(N)]) + assert x_t.shape[0] == N + yt = f(x_t) + assert yt.shape[0] == N + f.terminate() + + +@pytest.mark.poli__rmf +@pytest.mark.parametrize("seed", [1, 2, 3]) +def test_rmf_seed_consistent(seed: int): + mutation_seq = list(ref_aa_seq) + mutation_seq[int(len(mutation_seq) / 2)] = "A" + mutation_seq[int(len(mutation_seq) / 4)] = "H" + mutation_seq = np.array(mutation_seq)[None, :] + problem_a = objective_factory.create( + name="rmf_landscape", + wildtype=ref_aa_seq, + seed=seed, + ) + problem_b = objective_factory.create( + name="rmf_landscape", wildtype=ref_aa_seq, seed=seed + ) + f_a, x0_a = problem_a.black_box, problem_a.x0 + y0_a = f_a(x0_a) + f_b, x0_b = problem_b.black_box, problem_b.x0 + y0_b = f_b(x0_b) + + y1_a = f_a(mutation_seq) + y1_b = f_b(mutation_seq) + # test equalities + assert all([x_a == x_b for x_a, x_b in zip(x0_a[0], x0_b[0])]) + assert y0_a == y0_b + assert y1_a == y1_b # value for mutated sequences equal + f_a.terminate() + f_b.terminate() + + +@pytest.mark.poli__rmf +@pytest.mark.parametrize("n_mutations", [1, 2, 3]) +def test_rmf_num_mutations_expected_val(n_mutations: int): + from scipy.stats import genpareto + + SEED = 1 + mutation_seq = list(ref_aa_seq) + for m in range(n_mutations): + mutation_seq[int(len(mutation_seq) / 2) - m] = "Y" + mutation_seq = np.array(mutation_seq)[None, :] + problem = objective_factory.create( + name="rmf_landscape", + kappa=-100, # set kappa <0 for sampling values close to zero + c=1, # set constant to one s.t. number mutations negative additive + wildtype=ref_aa_seq, + seed=SEED, + ) + + f, x0 = problem.black_box, problem.x0 + y0 = f(x0) + y1 = f(mutation_seq) + + rnd_state = np.random.default_rng(SEED) + ref_noise_0 = genpareto.rvs(f.kappa, size=1, random_state=rnd_state) + ref_noise_1 = genpareto.rvs(f.kappa, size=1, random_state=rnd_state) + + # black-box value minus noisy component should be approximately mutational distance if c==1 + assert np.isclose(np.round(y0 - ref_noise_0), 0) + assert np.isclose(np.round(y1 - ref_noise_1), -n_mutations) + f.terminate() diff --git a/tox.ini b/tox.ini index 833cef24..3cfab74c 100644 --- a/tox.ini +++ b/tox.ini @@ -98,7 +98,7 @@ commands= pytest {tty:--color=yes} -v -m "not slow and poli__dockstring" {posargs} [testenv:poli-rasp-py39] -description = run the tests with pytest on the dockstring environment for poli +description = run the tests with pytest on the RaSP environment for poli basepython = python3.9 wheel_build_env = .pkg deps= @@ -111,3 +111,17 @@ commands= sh -c "conda run -n poli__rasp python -m pip install -e ." pytest {tty:--color=yes} -v -m "not slow and poli__rasp" {posargs} +[testenv:poli-rmf-py39] +description = run the tests with pytest on the dockstring environment for poli +basepython = python3.9 +wheel_build_env = .pkg +deps= + {[testenv]deps} + -r requirements.txt + -e. +commands= + sh -c 'if conda info --envs | grep -q poli__rmf; then echo "poli__rmf already exists"; else conda env create -f ./src/poli/objective_repository/rmf_landscape/environment.yml; fi' + sh -c "conda run -n poli__rmf python -m pip uninstall -y poli" + sh -c "conda run -n poli__rmf python -m pip install -e ." + sh -c "conda run -n poli__rmf pip install pytest" + sh -c "conda run -n poli__rmf pytest -v -m 'not slow and poli__rmf'" \ No newline at end of file From 2b4142a30e81321f14a64cfe9bb1cf1d2a5a7d01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miguel=20Gonz=C3=A1lez=20Duque?= Date: Mon, 29 Jul 2024 11:16:21 +0200 Subject: [PATCH 10/10] Lints with `ruff` (#226) * Runs the ruff linter * Adds ruff to the pre-commit hooks * Runs ruff and adds it to the linting checks * Fixes a small error on tox's use of ruff * Fixes an erroneous import in a test --- .pre-commit-config.yaml | 7 +++ .../example_logging_rdkit_qed_using_wandb.py | 1 - .../mlflow_observer.py | 2 - .../registering_aloha.py | 1 - pyproject.toml | 3 ++ src/poli/__init__.py | 2 + src/poli/benchmarks/__init__.py | 8 +++ src/poli/core/abstract_black_box.py | 1 - src/poli/core/abstract_problem_factory.py | 5 -- .../core/chemistry/tdc_isolated_function.py | 2 +- src/poli/core/multi_objective_black_box.py | 1 - src/poli/core/problem_setup_information.py | 2 - .../core/proteins/foldx_isolated_function.py | 2 - src/poli/core/registry.py | 2 +- src/poli/core/util/alignment/__init__.py | 2 + src/poli/core/util/batch/__init__.py | 2 + .../core/util/chemistry/string_to_molecule.py | 4 +- src/poli/core/util/files/__init__.py | 2 + .../process_wrapper.py | 6 +-- .../core/util/isolation/external_black_box.py | 2 +- src/poli/core/util/isolation/instancing.py | 4 +- .../objective_management/make_run_script.py | 1 - src/poli/core/util/observer_wrapper.py | 5 +- src/poli/core/util/proteins/rasp/__init__.py | 2 + .../proteins/rasp/inner_rasp/PrismData.py | 50 +++++++++---------- .../proteins/rasp/inner_rasp/cavity_model.py | 6 +-- .../util/proteins/rasp/inner_rasp/helpers.py | 24 +++------ .../pdb_parser_scripts/clean_pdb.py | 2 - .../extract_environments.py | 1 - .../proteins/rasp/inner_rasp/run_pipeline.py | 14 +----- .../proteins/rasp/inner_rasp/visualization.py | 1 - src/poli/core/util/seeding/__init__.py | 2 + src/poli/objective_factory.py | 10 ++-- .../objective_repository/aloha/register.py | 1 - .../dockstring/isolated_function.py | 9 +--- .../dockstring/register.py | 5 +- .../drd3_docking/register.py | 1 - .../foldx_rfp_lambo/__init__.py | 10 ++-- .../foldx_rfp_lambo/isolated_function.py | 1 - .../gfp_cbas/cbas_wrapper.py | 2 - .../objective_repository/gfp_cbas/gfp_gp.py | 1 - .../gfp_cbas/isolated_function.py | 2 +- .../objective_repository/gfp_cbas/register.py | 2 +- .../gfp_select/isolated_function.py | 6 --- .../gfp_select/register.py | 2 - .../penalized_logp_lambo/isolated_function.py | 2 +- .../objective_repository/rasp/register.py | 2 - .../rdkit_logp/register.py | 2 +- .../rdkit_qed/register.py | 3 +- .../rmf_landscape/isolated_function.py | 6 +-- .../rmf_landscape/register.py | 13 ++--- .../objective_repository/sa_tdc/register.py | 1 - .../super_mario_bros/register.py | 1 - .../toy_continuous_problem/register.py | 7 +-- .../white_noise/register.py | 2 - src/poli/repository.py | 2 +- .../benchmarks/test_benchmark_creation.py | 4 +- .../basic_objectives/test_basic_objectives.py | 2 +- .../chemistry/test_chemistry_objectives.py | 9 ++-- src/poli/tests/registry/proteins/test_rasp.py | 5 -- src/poli/tests/registry/proteins/test_rmf.py | 3 +- .../test_basic_loop_without_create.py | 4 +- ...t_instancing_of_toy_continuous_problems.py | 2 +- .../toy_discrete_problems/test_ehrlich.py | 4 +- src/poli/tests/test_core_promises.py | 5 +- src/poli/tests/util/test_foldx_interface.py | 4 +- src/poli/tests/util/test_protein_utilities.py | 1 - tox.ini | 2 + 68 files changed, 122 insertions(+), 185 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3a292f1d..fa7092dd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -14,3 +14,10 @@ repos: rev: 5.13.2 hooks: - id: isort +- repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.5.5 + hooks: + # Run the linter. + - id: ruff + args: [ --fix ] \ No newline at end of file diff --git a/examples/observers/adding_a_wandb_observer/example_logging_rdkit_qed_using_wandb.py b/examples/observers/adding_a_wandb_observer/example_logging_rdkit_qed_using_wandb.py index 9335e4aa..f1cde9ab 100644 --- a/examples/observers/adding_a_wandb_observer/example_logging_rdkit_qed_using_wandb.py +++ b/examples/observers/adding_a_wandb_observer/example_logging_rdkit_qed_using_wandb.py @@ -11,7 +11,6 @@ import numpy as np from wandb_observer import WandbObserver -from poli.core.problem import Problem from poli.objective_repository import QEDProblemFactory THIS_DIR = Path(__file__).parent.resolve() diff --git a/examples/observers/adding_an_mlflow_observer/mlflow_observer.py b/examples/observers/adding_an_mlflow_observer/mlflow_observer.py index 3a8e6248..a5f56b3d 100644 --- a/examples/observers/adding_an_mlflow_observer/mlflow_observer.py +++ b/examples/observers/adding_an_mlflow_observer/mlflow_observer.py @@ -61,8 +61,6 @@ def initialize_observer( } ) - mlflow.log_param("x0", x0) - mlflow.log_param("y0", y0) mlflow.log_param("seed", seed) def observe(self, x: np.ndarray, y: np.ndarray, context=None) -> None: diff --git a/examples/the_basics/a_simple_objective_function_registration/registering_aloha.py b/examples/the_basics/a_simple_objective_function_registration/registering_aloha.py index eb2675d3..19c722c4 100644 --- a/examples/the_basics/a_simple_objective_function_registration/registering_aloha.py +++ b/examples/the_basics/a_simple_objective_function_registration/registering_aloha.py @@ -6,7 +6,6 @@ """ from string import ascii_uppercase -from typing import Tuple import numpy as np diff --git a/pyproject.toml b/pyproject.toml index e48aff29..0b8d8e04 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,3 +48,6 @@ markers = [ [tool.isort] profile = "black" + +[tool.ruff] +exclude = ["src/poli/core/util/proteins/rasp/inner_rasp", "src/poli/objective_repository/gfp_cbas"] \ No newline at end of file diff --git a/src/poli/__init__.py b/src/poli/__init__.py index edca0642..6bb5b772 100644 --- a/src/poli/__init__.py +++ b/src/poli/__init__.py @@ -6,3 +6,5 @@ # from .core import get_problems from .objective_factory import create from .objective_repository import get_problems + +__all__ = ["create", "get_problems", "instance_function_as_isolated_process"] diff --git a/src/poli/benchmarks/__init__.py b/src/poli/benchmarks/__init__.py index 561558af..6496ac22 100644 --- a/src/poli/benchmarks/__init__.py +++ b/src/poli/benchmarks/__init__.py @@ -5,3 +5,11 @@ EmbeddedHartmann6D, ToyContinuousFunctionsBenchmark, ) + +__all__ = [ + "GuacaMolGoalDirectedBenchmark", + "PMOBenchmark", + "ToyContinuousFunctionsBenchmark", + "EmbeddedBranin2D", + "EmbeddedHartmann6D", +] diff --git a/src/poli/core/abstract_black_box.py b/src/poli/core/abstract_black_box.py index a3094d26..f4899fad 100644 --- a/src/poli/core/abstract_black_box.py +++ b/src/poli/core/abstract_black_box.py @@ -9,7 +9,6 @@ from poli.core.black_box_information import BlackBoxInformation from poli.core.exceptions import BudgetExhaustedException -from poli.core.problem_setup_information import ProblemSetupInformation from poli.core.util.abstract_observer import AbstractObserver from poli.core.util.alignment import is_aligned_input from poli.core.util.batch import batched diff --git a/src/poli/core/abstract_problem_factory.py b/src/poli/core/abstract_problem_factory.py index 12b967e1..ca5226ba 100644 --- a/src/poli/core/abstract_problem_factory.py +++ b/src/poli/core/abstract_problem_factory.py @@ -1,10 +1,5 @@ """This module implements the abstract problem factory.""" -from typing import Tuple - -import numpy as np - -from poli.core.abstract_black_box import AbstractBlackBox from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem diff --git a/src/poli/core/chemistry/tdc_isolated_function.py b/src/poli/core/chemistry/tdc_isolated_function.py index 6c4ca65c..52a23eff 100644 --- a/src/poli/core/chemistry/tdc_isolated_function.py +++ b/src/poli/core/chemistry/tdc_isolated_function.py @@ -82,7 +82,7 @@ def __call__(self, x, context=None): scores : array-like An array of oracle scores computed for each input molecule. """ - if not x.dtype.kind in ["U", "S"]: + if x.dtype.kind not in ["U", "S"]: raise ValueError( f"We expect x to be an array of strings, but we got {x.dtype}" ) diff --git a/src/poli/core/multi_objective_black_box.py b/src/poli/core/multi_objective_black_box.py index 4630004e..17f05bc3 100644 --- a/src/poli/core/multi_objective_black_box.py +++ b/src/poli/core/multi_objective_black_box.py @@ -10,7 +10,6 @@ from poli.core.abstract_black_box import AbstractBlackBox from poli.core.black_box_information import BlackBoxInformation -from poli.core.problem_setup_information import ProblemSetupInformation class MultiObjectiveBlackBox(AbstractBlackBox): diff --git a/src/poli/core/problem_setup_information.py b/src/poli/core/problem_setup_information.py index 3897fe53..c934fb4a 100644 --- a/src/poli/core/problem_setup_information.py +++ b/src/poli/core/problem_setup_information.py @@ -2,8 +2,6 @@ Implements the problem setup information, which contains the problem information (e.g. alphabet, sequence length...). """ -import numpy as np - from poli.core.black_box_information import BlackBoxInformation diff --git a/src/poli/core/proteins/foldx_isolated_function.py b/src/poli/core/proteins/foldx_isolated_function.py index 6fcb0991..2738be3e 100644 --- a/src/poli/core/proteins/foldx_isolated_function.py +++ b/src/poli/core/proteins/foldx_isolated_function.py @@ -1,4 +1,3 @@ -from multiprocessing import cpu_count from pathlib import Path from time import time from typing import List, Union @@ -7,7 +6,6 @@ import numpy as np from poli.core.abstract_isolated_function import AbstractIsolatedFunction -from poli.core.problem_setup_information import ProblemSetupInformation from poli.core.util.proteins.foldx import FoldxInterface from poli.core.util.proteins.pdb_parsing import ( parse_pdb_as_residue_strings, diff --git a/src/poli/core/registry.py b/src/poli/core/registry.py index 91948a56..f30b2d54 100644 --- a/src/poli/core/registry.py +++ b/src/poli/core/registry.py @@ -5,7 +5,7 @@ import subprocess import warnings from pathlib import Path -from typing import Dict, List, Type, Union +from typing import List, Type, Union from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_isolated_function import AbstractIsolatedFunction diff --git a/src/poli/core/util/alignment/__init__.py b/src/poli/core/util/alignment/__init__.py index e59a43fc..5dd21732 100644 --- a/src/poli/core/util/alignment/__init__.py +++ b/src/poli/core/util/alignment/__init__.py @@ -1 +1,3 @@ from .is_aligned import is_aligned_input + +__all__ = ["is_aligned_input"] diff --git a/src/poli/core/util/batch/__init__.py b/src/poli/core/util/batch/__init__.py index f5795a5c..84d65ac6 100644 --- a/src/poli/core/util/batch/__init__.py +++ b/src/poli/core/util/batch/__init__.py @@ -1,3 +1,5 @@ """Utility functions for batching data.""" from .batch_input import batched + +__all__ = ["batched"] diff --git a/src/poli/core/util/chemistry/string_to_molecule.py b/src/poli/core/util/chemistry/string_to_molecule.py index fcd914ae..fbfaf229 100644 --- a/src/poli/core/util/chemistry/string_to_molecule.py +++ b/src/poli/core/util/chemistry/string_to_molecule.py @@ -40,7 +40,7 @@ def translate_smiles_to_selfies( selfies_strings.append(sf.encoder(smile)) except sf.EncoderError: if strict: - raise ValueError(f"Failed to encode SMILES to SELFIES.") + raise ValueError("Failed to encode SMILES to SELFIES.") else: selfies_strings.append(None) @@ -78,7 +78,7 @@ def translate_selfies_to_smiles( smiles_strings.append(sf.decoder(selfies)) except sf.DecoderError: if strict: - raise ValueError(f"Failed to decode SELFIES to SMILES.") + raise ValueError("Failed to decode SELFIES to SMILES.") else: smiles_strings.append(None) diff --git a/src/poli/core/util/files/__init__.py b/src/poli/core/util/files/__init__.py index 524872bf..cc2168a4 100644 --- a/src/poli/core/util/files/__init__.py +++ b/src/poli/core/util/files/__init__.py @@ -1,3 +1,5 @@ """Utilities for downloading files from GitHub repositories.""" from .download_files_from_github import download_file_from_github_repository + +__all__ = ["download_file_from_github_repository"] diff --git a/src/poli/core/util/inter_process_communication/process_wrapper.py b/src/poli/core/util/inter_process_communication/process_wrapper.py index fbc4777a..590cd5e3 100644 --- a/src/poli/core/util/inter_process_communication/process_wrapper.py +++ b/src/poli/core/util/inter_process_communication/process_wrapper.py @@ -47,14 +47,14 @@ def get_connection(port: int, password: str) -> Client: # if we manage to establish a connection we exit the function return Client(address, authkey=password.encode()) # maybe the host process isn't ready yet - except EOFError as e: + except EOFError: pass - except ConnectionRefusedError as e: + except ConnectionRefusedError: pass retries -= 1 # when we get here, e must have been instantiated logging.fatal("Could not connect to host process.") - raise e + raise ConnectionError("Could not connect to host process.") class ProcessWrapper: diff --git a/src/poli/core/util/isolation/external_black_box.py b/src/poli/core/util/isolation/external_black_box.py index c534501a..9ba06e7f 100644 --- a/src/poli/core/util/isolation/external_black_box.py +++ b/src/poli/core/util/isolation/external_black_box.py @@ -93,7 +93,7 @@ def terminate(self): try: self.observer.finish() self.observer = None - except: + except Exception: pass def __getattr__(self, __name: str) -> Any: diff --git a/src/poli/core/util/isolation/instancing.py b/src/poli/core/util/isolation/instancing.py index 1d5d16b2..dc6d7bd3 100644 --- a/src/poli/core/util/isolation/instancing.py +++ b/src/poli/core/util/isolation/instancing.py @@ -233,13 +233,13 @@ def register_isolated_function_if_available( # Register problem if name == "tdc__isolated": logging.debug( - f"poli 🧪: Registered the isolated function from the repository." + "poli 🧪: Registered the isolated function from the repository." ) __register_isolated_function_from_core(name, quiet=quiet) config = load_config() else: logging.debug( - f"poli 🧪: Registered the isolated function from the repository." + "poli 🧪: Registered the isolated function from the repository." ) __register_isolated_function_from_repository(name, quiet=quiet) # Refresh the config diff --git a/src/poli/core/util/objective_management/make_run_script.py b/src/poli/core/util/objective_management/make_run_script.py index ad380f85..f66f09c3 100644 --- a/src/poli/core/util/objective_management/make_run_script.py +++ b/src/poli/core/util/objective_management/make_run_script.py @@ -10,7 +10,6 @@ from typing import List, Type, Union from poli import external_isolated_function_script, external_problem_factory_script -from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_isolated_function import AbstractIsolatedFunction from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.util import observer_wrapper diff --git a/src/poli/core/util/observer_wrapper.py b/src/poli/core/util/observer_wrapper.py index 399a4488..527e74b5 100644 --- a/src/poli/core/util/observer_wrapper.py +++ b/src/poli/core/util/observer_wrapper.py @@ -7,10 +7,7 @@ from poli.core.util.abstract_observer import AbstractObserver from poli.core.util.inter_process_communication.process_wrapper import get_connection -from poli.external_problem_factory_script import ( - dynamically_instantiate, - parse_factory_kwargs, -) +from poli.external_problem_factory_script import dynamically_instantiate def start_observer_process(observer_name, port: int, password: str): diff --git a/src/poli/core/util/proteins/rasp/__init__.py b/src/poli/core/util/proteins/rasp/__init__.py index 9dfd941a..792606aa 100644 --- a/src/poli/core/util/proteins/rasp/__init__.py +++ b/src/poli/core/util/proteins/rasp/__init__.py @@ -2,3 +2,5 @@ from .load_models import load_cavity_and_downstream_models from .rasp_interface import RaspInterface + +__all__ = ["load_cavity_and_downstream_models", "RaspInterface"] diff --git a/src/poli/core/util/proteins/rasp/inner_rasp/PrismData.py b/src/poli/core/util/proteins/rasp/inner_rasp/PrismData.py index 2943a94b..2c4e053e 100644 --- a/src/poli/core/util/proteins/rasp/inner_rasp/PrismData.py +++ b/src/poli/core/util/proteins/rasp/inner_rasp/PrismData.py @@ -23,7 +23,7 @@ import numpy as np import pandas as pd import yaml -from Bio import Seq, SeqIO, SeqRecord, SubsMat, pairwise2 +from Bio import Seq, SeqIO, SeqRecord, pairwise2 from Bio.SubsMat import MatrixInfo @@ -224,24 +224,24 @@ def read_header(self, filename, verbose=0): def check_header(self, header): """Check a header for fields required by all data files""" - if not "version" in header.keys(): + if "version" not in header.keys(): raise PrismFormatError("Header has no 'version' field") - if not "protein" in header.keys(): + if "protein" not in header.keys(): raise PrismFormatError("Header has no 'protein' field") - if not "name" in header["protein"].keys(): + if "name" not in header["protein"].keys(): raise PrismFormatError("Header has no 'protein: name' field") - if not "sequence" in header["protein"].keys(): + if "sequence" not in header["protein"].keys(): raise PrismFormatError("Header has no 'protein: sequence' field") - if not "uniprot" in header["protein"].keys(): + if "uniprot" not in header["protein"].keys(): raise PrismFormatError("Header has no 'protein: uniprot' field") if "first_residue_number" in header["protein"].keys(): if int(header["protein"]["first_residue_number"]) < 0: raise PrismFormatError("First residue number must be non-negative") - if not "columns" in header.keys(): + if "columns" not in header.keys(): raise PrismFormatError("Header has no 'columns' field") if "filename" in header.keys(): data_type = header["filename"].split("_")[1] - if not data_type.lower() in header.keys(): + if data_type.lower() not in header.keys(): raise PrismFormatError( "Header has no '%s' field but filename indicates this data type" % (data_type) @@ -280,9 +280,9 @@ def __merge_header_fields(self, header_list, mode="union"): def update_keys(key_dic, dic): """Update key_dic with keys from dic recursively""" for key in dic.keys(): - if not key in key_dic.keys(): + if key not in key_dic.keys(): key_dic[key] = {} - if type(dic[key]) == dict: + if isinstance(dic[key], dict): update_keys(key_dic[key], dic[key]) # Read all header keys @@ -330,7 +330,7 @@ def __dump_header_csv(self, filename, header_list): for header in header_list: row = [] for key in common_header_keys.keys(): - if not key in header: + if key not in header: row += [""] * np.max([1, len(common_header_keys[key])]) elif len(common_header_keys[key].keys()) == 0: row += [header[key]] @@ -405,7 +405,7 @@ def seq_from_data(self): Data is assumed to have index columns resi and aa_ref """ - if not "aa_ref" in self.dataframe.columns: + if "aa_ref" not in self.dataframe.columns: self.add_index_columns() n_res = 0 @@ -520,12 +520,12 @@ def check_column_names(self, verbose=0): 1: ] # first column is data specific and not in header for cn in data_colnames: - if not cn in meta_colnames: + if cn not in meta_colnames: raise PrismFormatError( "Could not find column name '%s' in header" % (cn) ) for cn in meta_colnames: - if not cn in data_colnames: + if cn not in data_colnames: raise PrismFormatError( "Could not find header column name '%s' in data" % (cn) ) @@ -616,7 +616,7 @@ def get_var_into_aa(self, aa, multimutant_mode="any"): - exclude : Only return single mutants """ # Check argument - if not multimutant_mode in ["any", "all", "exclude"]: + if multimutant_mode not in ["any", "all", "exclude"]: raise ValueError( "Function get_var_into_aa argument multimutant_mode must be 'any', 'all' or 'exclude'" ) @@ -649,7 +649,7 @@ def get_var_from_aa(self, aa, multimutant_mode="any"): - exclude : Only return single mutants """ # Check argument - if not multimutant_mode in ["any", "all", "exclude"]: + if multimutant_mode not in ["any", "all", "exclude"]: raise ValueError( "Function get_var_from_aa argument multimutant_mode must be 'any', 'all' or 'exclude'" ) @@ -682,7 +682,7 @@ def get_var_at_pos(self, target_resi, mode="any"): - exact : Substitutions at all given position and no others """ # Check argument mode - if not mode in ["any", "all", "exact"]: + if mode not in ["any", "all", "exact"]: raise ValueError( "Function get_var_from_aa argument mode must be 'any', 'all' or 'exact'" ) @@ -933,7 +933,7 @@ def strip_all(s): # Variant width if "width" in self.metadata["variants"].keys(): if recalc_variants["width"] == "single mutants": - if not strip_all(self.metadata["variants"]["width"]) in [ + if strip_all(self.metadata["variants"]["width"]) not in [ "singlemutants", "singlemutant", "singlemut", @@ -949,7 +949,7 @@ def strip_all(s): elif verbose > 0: print("WARNING: " + s) elif recalc_variants["width"] == "multi mutants": - if not strip_all(self.metadata["variants"]["width"]) in [ + if strip_all(self.metadata["variants"]["width"]) not in [ "multimutants", "multimutant", "multimut", @@ -965,7 +965,7 @@ def strip_all(s): elif verbose > 0: print("WARNING: " + s) elif recalc_variants["width"] == "single and double mutants": - if not strip_all(self.metadata["variants"]["width"]) in [ + if strip_all(self.metadata["variants"]["width"]) not in [ "singleanddoublemutants", "singleanddoublemutant", "singleanddouble", @@ -1270,7 +1270,7 @@ def to_new_reference( resi_rm = [] resi_shift = np.full(n_res_data, resi_shift_init) aa_change = {} - if not target_seq is None: + if target_seq is not None: if not PrismParser.is_aa_one_nat(None, target_seq, "X"): raise ValueError( "Argument target_seq to VariantData.to_new_reference must be a single-letter amino acid string (or None)" @@ -1406,7 +1406,7 @@ def to_new_reference( ) ) - if not first_resn is None: + if first_resn is not None: self.metadata["protein"]["first_residue_number"] = first_resn assert int(self.metadata["protein"]["first_residue_number"]) == first_resn @@ -1522,7 +1522,7 @@ def merge( **kwargs : keyword arguments Passed to to_new_reference function """ - if not merge in ["left", "outer", "inner"]: + if merge not in ["left", "outer", "inner"]: raise ValueError("Allowed merge arguments are left, outer or inner") merged_data = self.copy() @@ -1536,7 +1536,7 @@ def merge( if target_seq is None: # Make from meta data, variant residue numbers will match the index of this target_seq = self.metadata["protein"]["sequence"] - if not first_resn is None: + if first_resn is not None: raise ValueError( "merge argument first_resn can only be set if target_seq != None\n" + "Use VariantData.to_new_reference to only shift residue numbering" @@ -1815,7 +1815,7 @@ def merge( ): record = None for r in SeqIO.parse(args.target_seq, "fasta"): - if not record is None: + if record is not None: # if args.verbose > 0: print( "WARNING: Only using the first sequence record in %s" diff --git a/src/poli/core/util/proteins/rasp/inner_rasp/cavity_model.py b/src/poli/core/util/proteins/rasp/inner_rasp/cavity_model.py index bb433b3c..7db4f99e 100644 --- a/src/poli/core/util/proteins/rasp/inner_rasp/cavity_model.py +++ b/src/poli/core/util/proteins/rasp/inner_rasp/cavity_model.py @@ -1,12 +1,8 @@ -import glob import os -import random -from typing import Callable, List, Union import numpy as np -import pandas as pd import torch -from torch.utils.data import DataLoader, Dataset +from torch.utils.data import Dataset __all__ = [ "ResidueEnvironment", diff --git a/src/poli/core/util/proteins/rasp/inner_rasp/helpers.py b/src/poli/core/util/proteins/rasp/inner_rasp/helpers.py index a8608df0..d5e6428e 100644 --- a/src/poli/core/util/proteins/rasp/inner_rasp/helpers.py +++ b/src/poli/core/util/proteins/rasp/inner_rasp/helpers.py @@ -1,30 +1,18 @@ import datetime -import glob -import itertools import os import pickle import random -import sys from pathlib import Path -from typing import Dict, List, Union import numpy as np import pandas as pd import pytz import torch -from Bio.PDB.Polypeptide import index_to_one, one_to_index +from Bio.PDB.Polypeptide import one_to_index from scipy.stats import pearsonr -from torch.nn.functional import softmax -from torch.utils.data import DataLoader, Dataset - -from .cavity_model import ( - CavityModel, - DDGDataset, - DDGToTensor, - DownstreamModel, - ResidueEnvironmentsDataset, - ToTensor, -) +from torch.utils.data import DataLoader + +from .cavity_model import DDGDataset, DDGToTensor, ResidueEnvironmentsDataset, ToTensor from .PrismData import PrismParser, VariantData from .visualization import learning_curve_cavity, learning_curve_ds @@ -201,7 +189,7 @@ def train_loop( else: patience += 1 if patience > PATIENCE_CUTOFF: - print(f"Early stopping activated.") + print("Early stopping activated.") break learning_curve_cavity(acc_val_list, acc_train_list, loss_train_list) @@ -220,7 +208,7 @@ def populate_dfs_with_resenvs(ddg_data, resenv_dataset): """ print( "Dropping data points where residue is not defined in structure " - f"or due to missing parsed pdb file" + "or due to missing parsed pdb file" ) # Add wt residue environments to standard ddg data dataframes diff --git a/src/poli/core/util/proteins/rasp/inner_rasp/pdb_parser_scripts/clean_pdb.py b/src/poli/core/util/proteins/rasp/inner_rasp/pdb_parser_scripts/clean_pdb.py index 125aeb59..e5be8013 100644 --- a/src/poli/core/util/proteins/rasp/inner_rasp/pdb_parser_scripts/clean_pdb.py +++ b/src/poli/core/util/proteins/rasp/inner_rasp/pdb_parser_scripts/clean_pdb.py @@ -1,10 +1,8 @@ import argparse import os import subprocess -import sys import tempfile import time -from io import BytesIO, StringIO from pathlib import Path import Bio.PDB diff --git a/src/poli/core/util/proteins/rasp/inner_rasp/pdb_parser_scripts/extract_environments.py b/src/poli/core/util/proteins/rasp/inner_rasp/pdb_parser_scripts/extract_environments.py index 7ec2cfff..955d6420 100644 --- a/src/poli/core/util/proteins/rasp/inner_rasp/pdb_parser_scripts/extract_environments.py +++ b/src/poli/core/util/proteins/rasp/inner_rasp/pdb_parser_scripts/extract_environments.py @@ -1,5 +1,4 @@ import argparse -import enum import os import sys import time diff --git a/src/poli/core/util/proteins/rasp/inner_rasp/run_pipeline.py b/src/poli/core/util/proteins/rasp/inner_rasp/run_pipeline.py index 1ed54134..5860e9df 100644 --- a/src/poli/core/util/proteins/rasp/inner_rasp/run_pipeline.py +++ b/src/poli/core/util/proteins/rasp/inner_rasp/run_pipeline.py @@ -1,35 +1,25 @@ import glob import os -import pathlib import random import subprocess -import sys import numpy as np import pandas as pd import torch from Bio.PDB.Polypeptide import index_to_one -from cavity_model import ( - CavityModel, - DownstreamModel, - ResidueEnvironment, - ResidueEnvironmentsDataset, -) +from cavity_model import CavityModel, DownstreamModel, ResidueEnvironmentsDataset from helpers import ( compute_pdb_combo_corrs, ds_pred, ds_train_val, fermi_transform, - get_ddg_dataloader, init_lin_weights, - inverse_fermi_transform, populate_dfs_with_resenvs, remove_disulfides, train_loop, train_val_split_cavity, train_val_split_ds, ) -from torch.utils.data import DataLoader, Dataset from visualization import ( hist_plot_all, homology_plot, @@ -67,7 +57,7 @@ def main(): # Pre-process all protein structures - print(f"Pre-processing PDBs ...") + print("Pre-processing PDBs ...") pdb_dirs = [ f"{os.path.dirname(os.getcwd())}/data/train/cavity/structure/", f"{os.path.dirname(os.getcwd())}/data/train/downstream/structure/", diff --git a/src/poli/core/util/proteins/rasp/inner_rasp/visualization.py b/src/poli/core/util/proteins/rasp/inner_rasp/visualization.py index 6478dd3c..9ccd51bc 100644 --- a/src/poli/core/util/proteins/rasp/inner_rasp/visualization.py +++ b/src/poli/core/util/proteins/rasp/inner_rasp/visualization.py @@ -6,7 +6,6 @@ import os import pickle -import Bio.PDB.Polypeptide import matplotlib.pyplot as plt import numpy as np import pandas as pd diff --git a/src/poli/core/util/seeding/__init__.py b/src/poli/core/util/seeding/__init__.py index e15ab2c8..d1a4def0 100644 --- a/src/poli/core/util/seeding/__init__.py +++ b/src/poli/core/util/seeding/__init__.py @@ -1,3 +1,5 @@ """Utilities for seeding random number generators.""" from .seeding import seed_numpy, seed_python, seed_python_numpy_and_torch, seed_torch + +__all__ = ["seed_numpy", "seed_python", "seed_python_numpy_and_torch", "seed_torch"] diff --git a/src/poli/objective_factory.py b/src/poli/objective_factory.py index 2777c403..9e20f985 100644 --- a/src/poli/objective_factory.py +++ b/src/poli/objective_factory.py @@ -5,7 +5,7 @@ import configparser import logging from pathlib import Path -from typing import Any, Tuple +from typing import Tuple import numpy as np @@ -235,7 +235,7 @@ def __register_objective_if_available( if answer == "y": # Register problem - logging.debug(f"poli 🧪: Registered the objective from the repository.") + logging.debug("poli 🧪: Registered the objective from the repository.") register_problem_from_repository(name, quiet=quiet) # Refresh the config config = load_config() @@ -449,7 +449,7 @@ def _instantiate_observer(observer_name: str, quiet: bool = False) -> AbstractOb observer = DefaultObserver() else: if not quiet: - print(f"poli 🧪: initializing the observer.") + print("poli 🧪: initializing the observer.") try: f = open(observer_script, "r") observer_class = ( @@ -457,8 +457,8 @@ def _instantiate_observer(observer_name: str, quiet: bool = False) -> AbstractOb ) f.close() observer = dynamically_instantiate(observer_class) - except: + except Exception: if not quiet: - print(f"poli 🧪: attempting isolated observer instantiation.") + print("poli 🧪: attempting isolated observer instantiation.") observer = ExternalObserver(observer_name=observer_name) return observer diff --git a/src/poli/objective_repository/aloha/register.py b/src/poli/objective_repository/aloha/register.py index 12e56b8c..76ca58bd 100644 --- a/src/poli/objective_repository/aloha/register.py +++ b/src/poli/objective_repository/aloha/register.py @@ -11,7 +11,6 @@ """ from string import ascii_uppercase -from typing import Literal, Tuple import numpy as np diff --git a/src/poli/objective_repository/dockstring/isolated_function.py b/src/poli/objective_repository/dockstring/isolated_function.py index 7e4e695f..93ed2ef5 100644 --- a/src/poli/objective_repository/dockstring/isolated_function.py +++ b/src/poli/objective_repository/dockstring/isolated_function.py @@ -1,16 +1,11 @@ -from typing import Literal, Tuple +from typing import Literal import numpy as np from dockstring import load_target -from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_isolated_function import AbstractIsolatedFunction -from poli.core.black_box_information import BlackBoxInformation from poli.core.registry import register_isolated_function from poli.core.util.chemistry.string_to_molecule import translate_selfies_to_smiles -from poli.objective_repository.dockstring.information import ( - dockstring_black_box_information, -) class IsolatedDockstringFunction(AbstractIsolatedFunction): @@ -127,7 +122,7 @@ def __call__(self, x: np.ndarray, context=None) -> np.ndarray: for smiles in molecules_as_smiles: try: score = self.target.dock(smiles)[0] - except Exception as e: + except Exception: score = np.nan scores.append(score) diff --git a/src/poli/objective_repository/dockstring/register.py b/src/poli/objective_repository/dockstring/register.py index f62958fe..0980cc27 100644 --- a/src/poli/objective_repository/dockstring/register.py +++ b/src/poli/objective_repository/dockstring/register.py @@ -24,10 +24,7 @@ from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem -from poli.core.util.chemistry.string_to_molecule import ( - translate_selfies_to_smiles, - translate_smiles_to_selfies, -) +from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies from poli.core.util.isolation.instancing import get_inner_function from poli.core.util.seeding import seed_python_numpy_and_torch from poli.objective_repository.dockstring.information import ( diff --git a/src/poli/objective_repository/drd3_docking/register.py b/src/poli/objective_repository/drd3_docking/register.py index ce9a2cd8..028264b4 100644 --- a/src/poli/objective_repository/drd3_docking/register.py +++ b/src/poli/objective_repository/drd3_docking/register.py @@ -17,7 +17,6 @@ from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies -from poli.core.util.isolation.instancing import instance_function_as_isolated_process from poli.core.util.seeding import seed_numpy, seed_python from poli.objective_repository.drd3_docking.information import drd3_docking_info diff --git a/src/poli/objective_repository/foldx_rfp_lambo/__init__.py b/src/poli/objective_repository/foldx_rfp_lambo/__init__.py index 6561efb3..a6c6b2ff 100644 --- a/src/poli/objective_repository/foldx_rfp_lambo/__init__.py +++ b/src/poli/objective_repository/foldx_rfp_lambo/__init__.py @@ -23,6 +23,10 @@ ALGORITHM = "ALGORITHM" BATCH_SIZE = "BATCH_SIZE" -from poli.objective_repository.foldx_rfp_lambo.register import ( - FoldXRFPLamboProblemFactory, -) +__all__ = [ + "PROBLEM_SEQ", + "CORRECT_SEQ", + "STARTING_N", + "ALGORITHM", + "BATCH_SIZE", +] diff --git a/src/poli/objective_repository/foldx_rfp_lambo/isolated_function.py b/src/poli/objective_repository/foldx_rfp_lambo/isolated_function.py index 9b37e4a2..3279b2ca 100644 --- a/src/poli/objective_repository/foldx_rfp_lambo/isolated_function.py +++ b/src/poli/objective_repository/foldx_rfp_lambo/isolated_function.py @@ -12,7 +12,6 @@ import numpy as np import yaml from lambo import __file__ as project_root_file -from lambo.tasks.proxy_rfp.proxy_rfp import ProxyRFPTask from lambo.utils import AMINO_ACIDS from poli.core.abstract_isolated_function import AbstractIsolatedFunction diff --git a/src/poli/objective_repository/gfp_cbas/cbas_wrapper.py b/src/poli/objective_repository/gfp_cbas/cbas_wrapper.py index fe0a8548..fd3af003 100644 --- a/src/poli/objective_repository/gfp_cbas/cbas_wrapper.py +++ b/src/poli/objective_repository/gfp_cbas/cbas_wrapper.py @@ -9,8 +9,6 @@ __author__ = "Simon Bartels, Richard Michael" -import os -import warnings from collections import OrderedDict from pathlib import Path diff --git a/src/poli/objective_repository/gfp_cbas/gfp_gp.py b/src/poli/objective_repository/gfp_cbas/gfp_gp.py index 13d60994..16834bba 100644 --- a/src/poli/objective_repository/gfp_cbas/gfp_gp.py +++ b/src/poli/objective_repository/gfp_cbas/gfp_gp.py @@ -8,7 +8,6 @@ warnings.filterwarnings("ignore") from pathlib import Path -from typing import Tuple import numpy as np diff --git a/src/poli/objective_repository/gfp_cbas/isolated_function.py b/src/poli/objective_repository/gfp_cbas/isolated_function.py index 84a25bf5..dc0c7592 100644 --- a/src/poli/objective_repository/gfp_cbas/isolated_function.py +++ b/src/poli/objective_repository/gfp_cbas/isolated_function.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import List, Literal, Tuple +from typing import Literal from warnings import warn import numpy as np diff --git a/src/poli/objective_repository/gfp_cbas/register.py b/src/poli/objective_repository/gfp_cbas/register.py index 057651e8..30239ba3 100644 --- a/src/poli/objective_repository/gfp_cbas/register.py +++ b/src/poli/objective_repository/gfp_cbas/register.py @@ -1,4 +1,4 @@ -from typing import Literal, Tuple +from typing import Literal from warnings import warn import numpy as np diff --git a/src/poli/objective_repository/gfp_select/isolated_function.py b/src/poli/objective_repository/gfp_select/isolated_function.py index 7781a91f..e9adfe6e 100644 --- a/src/poli/objective_repository/gfp_select/isolated_function.py +++ b/src/poli/objective_repository/gfp_select/isolated_function.py @@ -1,15 +1,9 @@ from pathlib import Path -from typing import Tuple import numpy as np import pandas as pd -from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_isolated_function import AbstractIsolatedFunction -from poli.core.abstract_problem_factory import AbstractProblemFactory -from poli.core.problem_setup_information import ProblemSetupInformation -from poli.core.util.proteins.defaults import AMINO_ACIDS -from poli.core.util.seeding import seed_numpy, seed_python class GFPSelectIsolatedLogic(AbstractIsolatedFunction): diff --git a/src/poli/objective_repository/gfp_select/register.py b/src/poli/objective_repository/gfp_select/register.py index 8d416289..7bd51a35 100644 --- a/src/poli/objective_repository/gfp_select/register.py +++ b/src/poli/objective_repository/gfp_select/register.py @@ -1,5 +1,3 @@ -from typing import Tuple - import numpy as np from poli.core.abstract_black_box import AbstractBlackBox diff --git a/src/poli/objective_repository/penalized_logp_lambo/isolated_function.py b/src/poli/objective_repository/penalized_logp_lambo/isolated_function.py index 4bee9f4f..020d768b 100644 --- a/src/poli/objective_repository/penalized_logp_lambo/isolated_function.py +++ b/src/poli/objective_repository/penalized_logp_lambo/isolated_function.py @@ -85,7 +85,7 @@ def __call__(self, x: np.ndarray, context: dict = None): and then computes the penalized logP. If the translation threw an error, we return NaN instead. """ - if not x.dtype.kind in ["U", "S"]: + if x.dtype.kind not in ["U", "S"]: raise ValueError( f"We expect x to be an array of strings, but we got {x.dtype}" ) diff --git a/src/poli/objective_repository/rasp/register.py b/src/poli/objective_repository/rasp/register.py index fa918f61..ceac32e2 100644 --- a/src/poli/objective_repository/rasp/register.py +++ b/src/poli/objective_repository/rasp/register.py @@ -20,8 +20,6 @@ from pathlib import Path from typing import List, Union -import numpy as np - from poli.core.abstract_black_box import AbstractBlackBox from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation diff --git a/src/poli/objective_repository/rdkit_logp/register.py b/src/poli/objective_repository/rdkit_logp/register.py index d26053fb..c923448c 100644 --- a/src/poli/objective_repository/rdkit_logp/register.py +++ b/src/poli/objective_repository/rdkit_logp/register.py @@ -9,7 +9,7 @@ strings. """ -from typing import Literal, Tuple +from typing import Literal import numpy as np from rdkit.Chem import Descriptors diff --git a/src/poli/objective_repository/rdkit_qed/register.py b/src/poli/objective_repository/rdkit_qed/register.py index a4c4a183..33e3bedb 100644 --- a/src/poli/objective_repository/rdkit_qed/register.py +++ b/src/poli/objective_repository/rdkit_qed/register.py @@ -10,7 +10,7 @@ 0 and 1. We allow for both SMILES and SELFIES strings. """ -from typing import List, Literal, Tuple +from typing import Literal import numpy as np from rdkit.Chem.QED import qed @@ -19,7 +19,6 @@ from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem -from poli.core.problem_setup_information import ProblemSetupInformation from poli.core.util.chemistry.string_to_molecule import strings_to_molecules from poli.core.util.seeding import seed_python_numpy_and_torch from poli.objective_repository.rdkit_qed.information import rdkit_qed_info diff --git a/src/poli/objective_repository/rmf_landscape/isolated_function.py b/src/poli/objective_repository/rmf_landscape/isolated_function.py index 6ec63d1d..6d602f22 100644 --- a/src/poli/objective_repository/rmf_landscape/isolated_function.py +++ b/src/poli/objective_repository/rmf_landscape/isolated_function.py @@ -1,11 +1,9 @@ from __future__ import annotations import logging -from random import seed -from typing import List, Optional +from typing import List import numpy as np -from scipy.spatial.distance import hamming from scipy.stats import genpareto from poli.core.abstract_isolated_function import AbstractIsolatedFunction @@ -52,7 +50,6 @@ def __init__( "Missing reference input sequence. " "Did you forget to pass it to the create of the black box?" ) - oracle_name = "RMF" if not isinstance(wildtype, np.ndarray): wildtype = np.array(list(wildtype)) self.wildtype = wildtype @@ -92,7 +89,6 @@ def f( kappa: float, rand_state, ) -> float: - L = len(sigma) # from [1] (2) additive term via Hamming distance and constant # hamm_dist = hamming(sigma.flatten(), sigma_star.flatten()) # NOTE scipy HD is normalized, DON't USE hamm_dist = np.sum(sigma != sigma_star) diff --git a/src/poli/objective_repository/rmf_landscape/register.py b/src/poli/objective_repository/rmf_landscape/register.py index 8c5c6e34..8add28d4 100644 --- a/src/poli/objective_repository/rmf_landscape/register.py +++ b/src/poli/objective_repository/rmf_landscape/register.py @@ -4,14 +4,14 @@ References ---------- [1] Adaptation in Tunably Rugged Fitness Landscapes: The Rough Mount Fuji Model. - Neidhart J., Szendro I.G., and Krug, J. Genetics 198, 699-721 (2014). https://doi.org/10.1534/genetics.114.167668 + Neidhart J., Szendro I.G., and Krug, J. Genetics 198, 699-721 (2014). https://doi.org/10.1534/genetics.114.167668 [2] Analysis of a local fitness landscape with a model of the rough Mt. Fuji-type landscape: Application to prolyl endopeptidase and thermolysin. - Aita T., Uchiyama H., et al. Biopolymers 54, 64-79 (2000). https://doi.org/10.1002/(SICI)1097-0282(200007)54:1<64::AID-BIP70>3.0.CO;2-R + Aita T., Uchiyama H., et al. Biopolymers 54, 64-79 (2000). https://doi.org/10.1002/(SICI)1097-0282(200007)54:1<64::AID-BIP70>3.0.CO;2-R """ from __future__ import annotations -from typing import List, Optional, Union +from typing import List import numpy as np @@ -19,10 +19,7 @@ from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem -from poli.core.util.isolation.instancing import ( - get_inner_function, - instance_function_as_isolated_process, -) +from poli.core.util.isolation.instancing import get_inner_function from poli.core.util.seeding import seed_python_numpy_and_torch from poli.objective_repository.rmf_landscape.information import rmf_info @@ -102,7 +99,7 @@ def __init__( self.alphabet = alphabet self.seed = seed self.force_isolation = force_isolation - inner_function = get_inner_function( # NOTE: this implicitly registers + _ = get_inner_function( # NOTE: this implicitly registers isolated_function_name="rmf_landscape__isolated", class_name="RMFIsolatedLogic", module_to_import="poli.objective_repository.rmf_landscape.isolated_function", diff --git a/src/poli/objective_repository/sa_tdc/register.py b/src/poli/objective_repository/sa_tdc/register.py index 2891c4e2..7ed82a39 100644 --- a/src/poli/objective_repository/sa_tdc/register.py +++ b/src/poli/objective_repository/sa_tdc/register.py @@ -17,7 +17,6 @@ from poli.core.chemistry.tdc_black_box import TDCBlackBox from poli.core.problem import Problem from poli.core.util.chemistry.string_to_molecule import translate_smiles_to_selfies -from poli.core.util.isolation.instancing import instance_function_as_isolated_process from poli.core.util.seeding import seed_python_numpy_and_torch from poli.objective_repository.sa_tdc.information import sa_tdc_info diff --git a/src/poli/objective_repository/super_mario_bros/register.py b/src/poli/objective_repository/super_mario_bros/register.py index 0d7cec22..ae3a4bd8 100644 --- a/src/poli/objective_repository/super_mario_bros/register.py +++ b/src/poli/objective_repository/super_mario_bros/register.py @@ -8,7 +8,6 @@ """ from pathlib import Path -from typing import Tuple import numpy as np diff --git a/src/poli/objective_repository/toy_continuous_problem/register.py b/src/poli/objective_repository/toy_continuous_problem/register.py index e021519c..39e4e3b8 100644 --- a/src/poli/objective_repository/toy_continuous_problem/register.py +++ b/src/poli/objective_repository/toy_continuous_problem/register.py @@ -25,12 +25,7 @@ toy_continuous_info, ) -from .toy_continuous_problem import ( - POSSIBLE_FUNCTIONS, - SIX_DIMENSIONAL_PROBLEMS, - TWO_DIMENSIONAL_PROBLEMS, - ToyContinuousProblem, -) +from .toy_continuous_problem import POSSIBLE_FUNCTIONS, ToyContinuousProblem class ToyContinuousBlackBox(AbstractBlackBox): diff --git a/src/poli/objective_repository/white_noise/register.py b/src/poli/objective_repository/white_noise/register.py index 612d7eeb..3efcd2ab 100644 --- a/src/poli/objective_repository/white_noise/register.py +++ b/src/poli/objective_repository/white_noise/register.py @@ -15,7 +15,6 @@ from poli.core.abstract_problem_factory import AbstractProblemFactory from poli.core.black_box_information import BlackBoxInformation from poli.core.problem import Problem -from poli.core.problem_setup_information import ProblemSetupInformation from poli.core.util.seeding import seed_python_numpy_and_torch @@ -161,7 +160,6 @@ def create( if seed is not None: seed_python_numpy_and_torch(seed) - problem_info = self.get_setup_information() f = WhiteNoiseBlackBox( batch_size=batch_size, parallelize=parallelize, diff --git a/src/poli/repository.py b/src/poli/repository.py index d8e11a3b..c2e51b3b 100644 --- a/src/poli/repository.py +++ b/src/poli/repository.py @@ -1 +1 @@ -from .objective_repository import * +from .objective_repository import * # noqa F403 diff --git a/src/poli/tests/benchmarks/test_benchmark_creation.py b/src/poli/tests/benchmarks/test_benchmark_creation.py index 5f10d78b..3e67ef5f 100644 --- a/src/poli/tests/benchmarks/test_benchmark_creation.py +++ b/src/poli/tests/benchmarks/test_benchmark_creation.py @@ -36,7 +36,7 @@ def test_creating_guacamol_benchmark(): benchmark = GuacaMolGoalDirectedBenchmark(string_representation="SELFIES") for problem in benchmark: - f, x0 = problem.black_box, problem.x0 + _, _ = problem.black_box, problem.x0 # Break after the first iteration # for CI efficiency @@ -50,7 +50,7 @@ def test_creating_pmo_benchmark(): benchmark = PMOBenchmark(string_representation="SELFIES") for problem in benchmark: - f, x0 = problem.black_box, problem.x0 + _, _ = problem.black_box, problem.x0 # Break after the first iteration # for CI efficiency. The creation of all diff --git a/src/poli/tests/registry/basic_objectives/test_basic_objectives.py b/src/poli/tests/registry/basic_objectives/test_basic_objectives.py index 423fb5bd..fcc0302f 100644 --- a/src/poli/tests/registry/basic_objectives/test_basic_objectives.py +++ b/src/poli/tests/registry/basic_objectives/test_basic_objectives.py @@ -6,7 +6,7 @@ def test_registering_white_noise(): white_noise_problem = objective_factory.create(name="white_noise") f, x0 = white_noise_problem.black_box, white_noise_problem.x0 - y0 = f(x0) + _ = f(x0) f.terminate() diff --git a/src/poli/tests/registry/chemistry/test_chemistry_objectives.py b/src/poli/tests/registry/chemistry/test_chemistry_objectives.py index 6709ab1b..c861f3a2 100644 --- a/src/poli/tests/registry/chemistry/test_chemistry_objectives.py +++ b/src/poli/tests/registry/chemistry/test_chemistry_objectives.py @@ -73,12 +73,11 @@ def test_penalized_logp_lambo(): Testing whether we can register the logp problem from lambo. """ - from poli import objective_factory _ = pytest.importorskip("lambo") # Using create - problem = objective_factory.create(name="penalized_logp_lambo", force_register=True) + _ = objective_factory.create(name="penalized_logp_lambo") @pytest.mark.poli__dockstring @@ -86,7 +85,6 @@ def test_querying_dockstring_using_smiles(): """ In this test, we force-register and query dockstring. """ - from poli import objective_factory problem = objective_factory.create( name="dockstring", @@ -98,7 +96,7 @@ def test_querying_dockstring_using_smiles(): # Docking another smiles x1 = np.array([list("CC(=O)OC1=CC=CC=C1C(=O)O")]) - y1 = f(x1) + _ = f(x1) f.terminate() @@ -109,7 +107,6 @@ def test_querying_dockstring_using_selfies(): In this test, we check whether dockstring still works when using SELFIES instead of SMILES. """ - from poli import objective_factory problem = objective_factory.create( name="dockstring", @@ -146,7 +143,7 @@ def test_querying_dockstring_using_selfies(): ] ) - y1 = f(selfies_aspirin) + _ = f(selfies_aspirin) f.terminate() diff --git a/src/poli/tests/registry/proteins/test_rasp.py b/src/poli/tests/registry/proteins/test_rasp.py index b9235e00..aec5edce 100644 --- a/src/poli/tests/registry/proteins/test_rasp.py +++ b/src/poli/tests/registry/proteins/test_rasp.py @@ -10,11 +10,6 @@ @pytest.mark.poli__rasp def test_rasp_on_3ned_against_notebooks_results_on_rasp_env(): - try: - from poli.objective_repository.rasp.isolated_function import RaspIsolatedLogic - except ImportError: - pytest.skip("Could not import the rasp isolated logic. ") - import torch # For us to match what the notebook says, we have diff --git a/src/poli/tests/registry/proteins/test_rmf.py b/src/poli/tests/registry/proteins/test_rmf.py index 28440054..e776071a 100644 --- a/src/poli/tests/registry/proteins/test_rmf.py +++ b/src/poli/tests/registry/proteins/test_rmf.py @@ -2,7 +2,6 @@ import pytest from poli import objective_factory -from poli.objective_repository import AVAILABLE_PROBLEM_FACTORIES ref_aa_seq = "HPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWNPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW" @@ -45,7 +44,7 @@ def test_rmf_landscape_batch_eval(): ) N = 10 f, x0 = problem.black_box, problem.x0 - y0 = f(x0) + _ = f(x0) x_t = [] seq_b = x0.copy() seq_b[0, 1] = "Y" diff --git a/src/poli/tests/registry/test_basic_loop_without_create.py b/src/poli/tests/registry/test_basic_loop_without_create.py index fe5595c1..13e408c2 100644 --- a/src/poli/tests/registry/test_basic_loop_without_create.py +++ b/src/poli/tests/registry/test_basic_loop_without_create.py @@ -11,10 +11,10 @@ def test_basic_loop_without_create(): problem_factory = ToyContinuousProblemFactory() - problem = problem_factory.create(function_name="ackley_function_01") + problem = problem_factory.create(function_name="ackley_function_01") # noqa F841 def test_instancing_black_boxes_alone(): from poli.objective_repository import ToyContinuousBlackBox - f = ToyContinuousBlackBox(function_name="ackley_function_01") + f = ToyContinuousBlackBox(function_name="ackley_function_01") # noqa F841 diff --git a/src/poli/tests/registry/toy_continuous_problems/test_instancing_of_toy_continuous_problems.py b/src/poli/tests/registry/toy_continuous_problems/test_instancing_of_toy_continuous_problems.py index 7d99e498..e38a949d 100644 --- a/src/poli/tests/registry/toy_continuous_problems/test_instancing_of_toy_continuous_problems.py +++ b/src/poli/tests/registry/toy_continuous_problems/test_instancing_of_toy_continuous_problems.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from poli.objective_repository.toy_continuous_problem.register import ( +from poli.objective_repository.toy_continuous_problem.toy_continuous_problem import ( POSSIBLE_FUNCTIONS, SIX_DIMENSIONAL_PROBLEMS, TWO_DIMENSIONAL_PROBLEMS, diff --git a/src/poli/tests/registry/toy_discrete_problems/test_ehrlich.py b/src/poli/tests/registry/toy_discrete_problems/test_ehrlich.py index 7c5632dc..c2d455bc 100644 --- a/src/poli/tests/registry/toy_discrete_problems/test_ehrlich.py +++ b/src/poli/tests/registry/toy_discrete_problems/test_ehrlich.py @@ -147,7 +147,7 @@ def test_creating_a_problem_with_a_factory(): ) f, x0 = problem.black_box, problem.x0 - y0 = f(x0) + _ = f(x0) def test_creating_with_create(): @@ -162,4 +162,4 @@ def test_creating_with_create(): seed=1, ) f, x0 = problem.black_box, problem.x0 - y0 = f(x0) + _ = f(x0) diff --git a/src/poli/tests/test_core_promises.py b/src/poli/tests/test_core_promises.py index 2c0923de..35e92d3f 100644 --- a/src/poli/tests/test_core_promises.py +++ b/src/poli/tests/test_core_promises.py @@ -1,7 +1,6 @@ """This test suite contains the core promises we make to the user.""" import numpy as np -import pytest def test_creating_an_instance_of_a_black_box(): @@ -9,7 +8,7 @@ def test_creating_an_instance_of_a_black_box(): f = WhiteNoiseBlackBox() x = np.array([["1", "2", "3"]]) - y = f(x) + _ = f(x) def test_creating_a_problem(): @@ -22,6 +21,6 @@ def test_creating_a_problem(): ) f, x0 = white_noise_problem.black_box, white_noise_problem.x0 - y0 = f(x0) + _ = f(x0) f.terminate() diff --git a/src/poli/tests/util/test_foldx_interface.py b/src/poli/tests/util/test_foldx_interface.py index efcb4fdb..f2dd0849 100644 --- a/src/poli/tests/util/test_foldx_interface.py +++ b/src/poli/tests/util/test_foldx_interface.py @@ -17,7 +17,9 @@ pytest.skip("Could not import the foldx interface. ", allow_module_level=True) try: - from poli.core.util.proteins.pdb_parsing import parse_pdb_as_residue_strings + from poli.core.util.proteins.pdb_parsing import ( # noqa F401 + parse_pdb_as_residue_strings, + ) except ImportError: pytest.skip( "Could not import the protein utilities for parsing. ", allow_module_level=True diff --git a/src/poli/tests/util/test_protein_utilities.py b/src/poli/tests/util/test_protein_utilities.py index 3f9f5616..0cb28260 100644 --- a/src/poli/tests/util/test_protein_utilities.py +++ b/src/poli/tests/util/test_protein_utilities.py @@ -23,7 +23,6 @@ try: from poli.core.util.proteins.mutations import ( - edits_between_strings, find_closest_wildtype_pdb_file_to_mutant, mutations_from_wildtype_residues_and_mutant, ) diff --git a/tox.ini b/tox.ini index 3cfab74c..f20b1aba 100644 --- a/tox.ini +++ b/tox.ini @@ -25,9 +25,11 @@ description = check the code style with black deps = black isort + ruff commands = black --check --diff . isort --profile black --check-only src/ + ruff check [testenv:poli-base-py39] description = run the tests with pytest on the base environment for poli