Runs the ruff linter

MachineLearningLifeScience · Jul 25, 2024 · ede25b2 · ede25b2
1 parent 134acc0
commit ede25b2
Show file tree

Hide file tree

Showing 50 changed files with 99 additions and 146 deletions.
diff --git a/examples/observers/adding_a_wandb_observer/example_logging_rdkit_qed_using_wandb.py b/examples/observers/adding_a_wandb_observer/example_logging_rdkit_qed_using_wandb.py
@@ -11,7 +11,6 @@
 import numpy as np
 from wandb_observer import WandbObserver
 
-from poli.core.problem import Problem
 from poli.objective_repository import QEDProblemFactory
 
 THIS_DIR = Path(__file__).parent.resolve()

diff --git a/examples/observers/adding_an_mlflow_observer/mlflow_observer.py b/examples/observers/adding_an_mlflow_observer/mlflow_observer.py
@@ -61,8 +61,6 @@ def initialize_observer(
             }
         )
 
-        mlflow.log_param("x0", x0)
-        mlflow.log_param("y0", y0)
         mlflow.log_param("seed", seed)
 
     def observe(self, x: np.ndarray, y: np.ndarray, context=None) -> None:

diff --git a/pyproject.toml b/pyproject.toml
@@ -48,3 +48,6 @@ markers = [
 
 [tool.isort]
 profile = "black"
+
+[tool.ruff]
+exclude = ["src/poli/core/util/proteins/rasp/inner_rasp", "src/poli/objective_repository/gfp_cbas"]
diff --git a/src/poli/__init__.py b/src/poli/__init__.py
@@ -6,3 +6,5 @@
 # from .core import get_problems
 from .objective_factory import create
 from .objective_repository import get_problems
+
+__all__ = ["create", "get_problems", "instance_function_as_isolated_process"]
diff --git a/src/poli/benchmarks/__init__.py b/src/poli/benchmarks/__init__.py
@@ -5,3 +5,11 @@
     EmbeddedHartmann6D,
     ToyContinuousFunctionsBenchmark,
 )
+
+__all__ = [
+    "GuacaMolGoalDirectedBenchmark",
+    "PMOBenchmark",
+    "ToyContinuousFunctionsBenchmark",
+    "EmbeddedBranin2D",
+    "EmbeddedHartmann6D",
+]
diff --git a/src/poli/core/abstract_black_box.py b/src/poli/core/abstract_black_box.py
@@ -9,7 +9,6 @@
 
 from poli.core.black_box_information import BlackBoxInformation
 from poli.core.exceptions import BudgetExhaustedException
-from poli.core.problem_setup_information import ProblemSetupInformation
 from poli.core.util.abstract_observer import AbstractObserver
 from poli.core.util.alignment import is_aligned_input
 from poli.core.util.batch import batched

diff --git a/src/poli/core/chemistry/tdc_isolated_function.py b/src/poli/core/chemistry/tdc_isolated_function.py
@@ -82,7 +82,7 @@ def __call__(self, x, context=None):
         scores : array-like
             An array of oracle scores computed for each input molecule.
         """
-        if not x.dtype.kind in ["U", "S"]:
+        if x.dtype.kind not in ["U", "S"]:
             raise ValueError(
                 f"We expect x to be an array of strings, but we got {x.dtype}"
             )

diff --git a/src/poli/core/multi_objective_black_box.py b/src/poli/core/multi_objective_black_box.py
@@ -10,7 +10,6 @@
 
 from poli.core.abstract_black_box import AbstractBlackBox
 from poli.core.black_box_information import BlackBoxInformation
-from poli.core.problem_setup_information import ProblemSetupInformation
 
 
 class MultiObjectiveBlackBox(AbstractBlackBox):

diff --git a/src/poli/core/problem_setup_information.py b/src/poli/core/problem_setup_information.py
@@ -2,8 +2,6 @@
 Implements the problem setup information, which contains the problem information (e.g. alphabet, sequence length...).
 """
 
-import numpy as np
-
 from poli.core.black_box_information import BlackBoxInformation
 
 

diff --git a/src/poli/core/proteins/foldx_isolated_function.py b/src/poli/core/proteins/foldx_isolated_function.py
@@ -1,4 +1,3 @@
-from multiprocessing import cpu_count
 from pathlib import Path
 from time import time
 from typing import List, Union
@@ -7,7 +6,6 @@
 import numpy as np
 
 from poli.core.abstract_isolated_function import AbstractIsolatedFunction
-from poli.core.problem_setup_information import ProblemSetupInformation
 from poli.core.util.proteins.foldx import FoldxInterface
 from poli.core.util.proteins.pdb_parsing import (
     parse_pdb_as_residue_strings,

diff --git a/src/poli/core/util/alignment/__init__.py b/src/poli/core/util/alignment/__init__.py
@@ -1 +1,3 @@
 from .is_aligned import is_aligned_input
+
+__all__ = ["is_aligned_input"]
diff --git a/src/poli/core/util/batch/__init__.py b/src/poli/core/util/batch/__init__.py
@@ -1,3 +1,5 @@
 """Utility functions for batching data."""
 
 from .batch_input import batched
+
+__all__ = ["batched"]
diff --git a/src/poli/core/util/chemistry/string_to_molecule.py b/src/poli/core/util/chemistry/string_to_molecule.py
@@ -40,7 +40,7 @@ def translate_smiles_to_selfies(
             selfies_strings.append(sf.encoder(smile))
         except sf.EncoderError:
             if strict:
-                raise ValueError(f"Failed to encode SMILES to SELFIES.")
+                raise ValueError("Failed to encode SMILES to SELFIES.")
             else:
                 selfies_strings.append(None)
 
@@ -78,7 +78,7 @@ def translate_selfies_to_smiles(
             smiles_strings.append(sf.decoder(selfies))
         except sf.DecoderError:
             if strict:
-                raise ValueError(f"Failed to decode SELFIES to SMILES.")
+                raise ValueError("Failed to decode SELFIES to SMILES.")
             else:
                 smiles_strings.append(None)
 

diff --git a/src/poli/core/util/files/__init__.py b/src/poli/core/util/files/__init__.py
@@ -1,3 +1,5 @@
 """Utilities for downloading files from GitHub repositories."""
 
 from .download_files_from_github import download_file_from_github_repository
+
+__all__ = ["download_file_from_github_repository"]
diff --git a/src/poli/core/util/inter_process_communication/process_wrapper.py b/src/poli/core/util/inter_process_communication/process_wrapper.py
@@ -47,14 +47,14 @@ def get_connection(port: int, password: str) -> Client:
             # if we manage to establish a connection we exit the function
             return Client(address, authkey=password.encode())
         # maybe the host process isn't ready yet
-        except EOFError as e:
+        except EOFError:
             pass
-        except ConnectionRefusedError as e:
+        except ConnectionRefusedError:
             pass
         retries -= 1
     # when we get here, e must have been instantiated
     logging.fatal("Could not connect to host process.")
-    raise e
+    raise ConnectionError("Could not connect to host process.")
 
 
 class ProcessWrapper:

diff --git a/src/poli/core/util/isolation/instancing.py b/src/poli/core/util/isolation/instancing.py
@@ -225,13 +225,13 @@ def register_isolated_function(name: str, quiet: bool = False):
         # the black box and the problem factory.
         if name == "tdc__isolated":
             logging.debug(
-                f"poli 🧪: Registered the isolated function from the repository."
+                "poli 🧪: Registered the isolated function from the repository."
             )
             __register_isolated_function_from_core(name, quiet=quiet)
             config = load_config()
         else:
             logging.debug(
-                f"poli 🧪: Registered the isolated function from the repository."
+                "poli 🧪: Registered the isolated function from the repository."
             )
             __register_isolated_function_from_repository(name, quiet=quiet)
             # Refresh the config

diff --git a/src/poli/core/util/proteins/rasp/__init__.py b/src/poli/core/util/proteins/rasp/__init__.py
@@ -2,3 +2,5 @@
 
 from .load_models import load_cavity_and_downstream_models
 from .rasp_interface import RaspInterface
+
+__all__ = ["load_cavity_and_downstream_models", "RaspInterface"]
diff --git a/src/poli/core/util/proteins/rasp/inner_rasp/PrismData.py b/src/poli/core/util/proteins/rasp/inner_rasp/PrismData.py
@@ -23,7 +23,7 @@
 import numpy as np
 import pandas as pd
 import yaml
-from Bio import Seq, SeqIO, SeqRecord, SubsMat, pairwise2
+from Bio import Seq, SeqIO, SeqRecord, pairwise2
 from Bio.SubsMat import MatrixInfo
 
 
@@ -224,24 +224,24 @@ def read_header(self, filename, verbose=0):
 
     def check_header(self, header):
         """Check a header for fields required by all data files"""
-        if not "version" in header.keys():
+        if "version" not in header.keys():
             raise PrismFormatError("Header has no 'version' field")
-        if not "protein" in header.keys():
+        if "protein" not in header.keys():
             raise PrismFormatError("Header has no 'protein' field")
-        if not "name" in header["protein"].keys():
+        if "name" not in header["protein"].keys():
             raise PrismFormatError("Header has no 'protein: name' field")
-        if not "sequence" in header["protein"].keys():
+        if "sequence" not in header["protein"].keys():
             raise PrismFormatError("Header has no 'protein: sequence' field")
-        if not "uniprot" in header["protein"].keys():
+        if "uniprot" not in header["protein"].keys():
             raise PrismFormatError("Header has no 'protein: uniprot' field")
         if "first_residue_number" in header["protein"].keys():
             if int(header["protein"]["first_residue_number"]) < 0:
                 raise PrismFormatError("First residue number must be non-negative")
-        if not "columns" in header.keys():
+        if "columns" not in header.keys():
             raise PrismFormatError("Header has no 'columns' field")
         if "filename" in header.keys():
             data_type = header["filename"].split("_")[1]
-            if not data_type.lower() in header.keys():
+            if data_type.lower() not in header.keys():
                 raise PrismFormatError(
                     "Header has no '%s' field but filename indicates this data type"
                     % (data_type)
@@ -280,9 +280,9 @@ def __merge_header_fields(self, header_list, mode="union"):
         def update_keys(key_dic, dic):
             """Update key_dic with keys from dic recursively"""
             for key in dic.keys():
-                if not key in key_dic.keys():
+                if key not in key_dic.keys():
                     key_dic[key] = {}
-                if type(dic[key]) == dict:
+                if isinstance(dic[key], dict):
                     update_keys(key_dic[key], dic[key])
 
         # Read all header keys
@@ -330,7 +330,7 @@ def __dump_header_csv(self, filename, header_list):
             for header in header_list:
                 row = []
                 for key in common_header_keys.keys():
-                    if not key in header:
+                    if key not in header:
                         row += [""] * np.max([1, len(common_header_keys[key])])
                     elif len(common_header_keys[key].keys()) == 0:
                         row += [header[key]]
@@ -405,7 +405,7 @@ def seq_from_data(self):
         Data is assumed to have index columns resi and aa_ref
         """
 
-        if not "aa_ref" in self.dataframe.columns:
+        if "aa_ref" not in self.dataframe.columns:
             self.add_index_columns()
 
         n_res = 0
@@ -520,12 +520,12 @@ def check_column_names(self, verbose=0):
             1:
         ]  # first column is data specific and not in header
         for cn in data_colnames:
-            if not cn in meta_colnames:
+            if cn not in meta_colnames:
                 raise PrismFormatError(
                     "Could not find column name '%s' in header" % (cn)
                 )
         for cn in meta_colnames:
-            if not cn in data_colnames:
+            if cn not in data_colnames:
                 raise PrismFormatError(
                     "Could not find header column name '%s' in data" % (cn)
                 )
@@ -616,7 +616,7 @@ def get_var_into_aa(self, aa, multimutant_mode="any"):
             - exclude : Only return single mutants
         """
         # Check argument
-        if not multimutant_mode in ["any", "all", "exclude"]:
+        if multimutant_mode not in ["any", "all", "exclude"]:
             raise ValueError(
                 "Function get_var_into_aa argument multimutant_mode must be 'any', 'all' or 'exclude'"
             )
@@ -649,7 +649,7 @@ def get_var_from_aa(self, aa, multimutant_mode="any"):
             - exclude : Only return single mutants
         """
         # Check argument
-        if not multimutant_mode in ["any", "all", "exclude"]:
+        if multimutant_mode not in ["any", "all", "exclude"]:
             raise ValueError(
                 "Function get_var_from_aa argument multimutant_mode must be 'any', 'all' or 'exclude'"
             )
@@ -682,7 +682,7 @@ def get_var_at_pos(self, target_resi, mode="any"):
             - exact : Substitutions at all given position and no others
         """
         # Check argument mode
-        if not mode in ["any", "all", "exact"]:
+        if mode not in ["any", "all", "exact"]:
             raise ValueError(
                 "Function get_var_from_aa argument mode must be 'any', 'all' or 'exact'"
             )
@@ -933,7 +933,7 @@ def strip_all(s):
             # Variant width
             if "width" in self.metadata["variants"].keys():
                 if recalc_variants["width"] == "single mutants":
-                    if not strip_all(self.metadata["variants"]["width"]) in [
+                    if strip_all(self.metadata["variants"]["width"]) not in [
                         "singlemutants",
                         "singlemutant",
                         "singlemut",
@@ -949,7 +949,7 @@ def strip_all(s):
                         elif verbose > 0:
                             print("WARNING: " + s)
                 elif recalc_variants["width"] == "multi mutants":
-                    if not strip_all(self.metadata["variants"]["width"]) in [
+                    if strip_all(self.metadata["variants"]["width"]) not in [
                         "multimutants",
                         "multimutant",
                         "multimut",
@@ -965,7 +965,7 @@ def strip_all(s):
                         elif verbose > 0:
                             print("WARNING: " + s)
                 elif recalc_variants["width"] == "single and double mutants":
-                    if not strip_all(self.metadata["variants"]["width"]) in [
+                    if strip_all(self.metadata["variants"]["width"]) not in [
                         "singleanddoublemutants",
                         "singleanddoublemutant",
                         "singleanddouble",
@@ -1270,7 +1270,7 @@ def to_new_reference(
         resi_rm = []
         resi_shift = np.full(n_res_data, resi_shift_init)
         aa_change = {}
-        if not target_seq is None:
+        if target_seq is not None:
             if not PrismParser.is_aa_one_nat(None, target_seq, "X"):
                 raise ValueError(
                     "Argument target_seq to VariantData.to_new_reference must be a single-letter amino acid string (or None)"
@@ -1406,7 +1406,7 @@ def to_new_reference(
                     )
                 )
 
-        if not first_resn is None:
+        if first_resn is not None:
             self.metadata["protein"]["first_residue_number"] = first_resn
         assert int(self.metadata["protein"]["first_residue_number"]) == first_resn
 
@@ -1522,7 +1522,7 @@ def merge(
         **kwargs : keyword arguments
             Passed to to_new_reference function
         """
-        if not merge in ["left", "outer", "inner"]:
+        if merge not in ["left", "outer", "inner"]:
             raise ValueError("Allowed merge arguments are left, outer or inner")
 
         merged_data = self.copy()
@@ -1536,7 +1536,7 @@ def merge(
         if target_seq is None:
             # Make from meta data, variant residue numbers will match the index of this
             target_seq = self.metadata["protein"]["sequence"]
-            if not first_resn is None:
+            if first_resn is not None:
                 raise ValueError(
                     "merge argument first_resn can only be set if target_seq != None\n"
                     + "Use VariantData.to_new_reference to only shift residue numbering"
@@ -1815,7 +1815,7 @@ def merge(
         ):
             record = None
             for r in SeqIO.parse(args.target_seq, "fasta"):
-                if not record is None:
+                if record is not None:
                     # if args.verbose > 0:
                     print(
                         "WARNING: Only using the first sequence record in %s"

diff --git a/src/poli/core/util/proteins/rasp/inner_rasp/cavity_model.py b/src/poli/core/util/proteins/rasp/inner_rasp/cavity_model.py
@@ -1,12 +1,8 @@
-import glob
 import os
-import random
-from typing import Callable, List, Union
 
 import numpy as np
-import pandas as pd
 import torch
-from torch.utils.data import DataLoader, Dataset
+from torch.utils.data import Dataset
 
 __all__ = [
     "ResidueEnvironment",
-Original file line number
+Diff line change
@@ Expand Up / @@ -2,8 +2,6 @@ @@
     Implements the problem setup information, which contains the problem information (e.g. alphabet, sequence length...).
     """
-    import numpy as np
     from poli.core.black_box_information import BlackBoxInformation
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -1 +1,3 @@
		from .is_aligned import is_aligned_input

		__all__ = ["is_aligned_input"]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,3 +2,5 @@

		from .load_models import load_cavity_and_downstream_models
		from .rasp_interface import RaspInterface

		__all__ = ["load_cavity_and_downstream_models", "RaspInterface"]