From 341171e5135aefd1ae8c4d6737d0eaacbe3320f7 Mon Sep 17 00:00:00 2001 From: riesben Date: Fri, 13 Sep 2024 17:12:15 +0200 Subject: [PATCH 01/24] adapting to new rdkit fingerprint generators. --- scikit_mol/fingerprints.py | 499 +++++++++++++++++++++++++++++++++++++ 1 file changed, 499 insertions(+) diff --git a/scikit_mol/fingerprints.py b/scikit_mol/fingerprints.py index 767bfc6..ef33aba 100644 --- a/scikit_mol/fingerprints.py +++ b/scikit_mol/fingerprints.py @@ -11,6 +11,11 @@ from rdkit.Chem import rdMHFPFingerprint from rdkit.Avalon import pyAvalonTools +from rdkit.Chem.rdFingerprintGenerator import (GetMorganGenerator, GetMorganFeatureAtomInvGen, + GetTopologicalTorsionGenerator, + GetAtomPairGenerator, + GetRDKitFPGenerator) + import numpy as np import pandas as pd from scipy.sparse import lil_matrix @@ -243,6 +248,9 @@ def __init__(self, minLength:int = 1, maxLength:int = 30, fromAtoms = 0, ignoreA self.nBitsPerEntry = nBitsPerEntry self.useCounts = useCounts + raise DeprecationWarning("TopologicalTorsionFingerprintTransformer will be replace by TopologicalTorsionFingerprintGeneratorTransformer, due to changes in RDKit!") + + def _mol2fp(self, mol): if self.useCounts: return rdMolDescriptors.GetHashedAtomPairFingerprint(mol, nBits=int(self.nBits), @@ -281,6 +289,8 @@ def __init__(self, targetSize:int = 4, fromAtoms = 0, ignoreAtoms = 0, atomInvar self.nBitsPerEntry = nBitsPerEntry self.nBits = nBits self.useCounts = useCounts + raise DeprecationWarning("TopologicalTorsionFingerprintTransformer will be replace by TopologicalTorsionFingerprintGeneratorTransformer, due to changes in RDKit!") + def _mol2fp(self, mol): if self.useCounts: @@ -478,6 +488,8 @@ def __init__(self, nBits=2048, radius=2, useChirality=False, useBondTypes=True, self.useBondTypes = useBondTypes self.useFeatures = useFeatures self.useCounts = useCounts + raise DeprecationWarning("MorganFingerprintTransformer will be replace by MorganGeneratorTransformer, due to changes in RDKit!") + def _mol2fp(self, mol): if self.useCounts: @@ -541,3 +553,490 @@ def parallel_helper(args): transformer = getattr(fingerprints, classname)(**parameters) return transformer._transform(X_mols) + +class FpsGeneratorTransformer(FpsTransformer): + + + def _fp2array(self, fp): + raise DeprecationWarning("Generators can directly return fingerprints") + + def _mol2fp(self, mol): + raise DeprecationWarning("use _mol2array") + + def __getstate__(self): + # Get the state of the parent class + state = super().__getstate__() + # Remove the unpicklable property from the state + state.pop("_fpgen", None) # fpgen is not picklable + return state + + def __setstate__(self, state): + # Restore the state of the parent class + super().__setstate__(state) + # Re-create the unpicklable property + self._generate_fp_generator() + + @abstractmethod + def _generate_fp_generator(self,*args, **kwargs): + raise NotImplementedError("_generate_fp_generator not implemented") + + @abstractmethod + def _transform_mol(self, mol) -> np.array: + """Generate numpy array descriptor from mol + + MUST BE OVERWRITTEN + """ + raise NotImplementedError("_transform_mol not implemented") + + +class MorganFPGeneratorTransformer(FpsGeneratorTransformer): + def __init__(self, nBits=2048, radius=2, useChirality=False, + useBondTypes=True, useFeatures=False, useCounts=False, + parallel: Union[bool, int] = False,): + """Transform RDKit mols into Count or bit-based hashed MorganFingerprints + + Parameters + ---------- + nBits : int, optional + Size of the hashed fingerprint, by default 2048 + radius : int, optional + Radius of the fingerprint, by default 2 + useChirality : bool, optional + Include chirality in calculation of the fingerprint keys, by default False + useBondTypes : bool, optional + Include bondtypes in calculation of the fingerprint keys, by default True + useFeatures : bool, optional + use chemical features, rather than atom-type in calculation of the fingerprint keys, by default False + useCounts : bool, optional + If toggled will create the count and not bit-based fingerprint, by default False + """ + super().__init__(parallel = parallel) + self._useFeatures = useFeatures + self._useCounts = useCounts + self._useBondTypes = useBondTypes + self._generate_fp_generator(useFeatures=useFeatures, radius=radius, nBits=nBits, + useChirality=useChirality, useBondTypes=useBondTypes) + + + def _generate_fp_generator(self, useFeatures:bool, radius:int, nBits:int, + useChirality:bool, useBondTypes:bool): + + if useFeatures: + atomInvariantsGenerator = GetMorganFeatureAtomInvGen() + else: + atomInvariantsGenerator = None + + self._fpgen = GetMorganGenerator(radius=radius, + fpSize=nBits, + includeChirality=useChirality, + useBondTypes=useBondTypes, + atomInvariantsGenerator=atomInvariantsGenerator, + ) + + @property + def radius(self): + return self._fpgen.GetOptions().radius + + @radius.setter + def radius(self, value:int): + self._fpgen.GetOptions().radius = value + + @property + def nBits(self): + return self._fpgen.GetOptions().fpSize + + @nBits.setter + def nBits(self, value:int): + self._fpgen.GetOptions().fpSize = value + + @property + def useChirality(self): + return self._fpgen.GetOptions().includeChirality + + @useChirality.setter + def useChirality(self, value:bool): + self._fpgen.GetOptions().includeChirality = value + + @property + def useFeatures(self): + return self._useFeatures + + @useFeatures.setter + def useFeatures(self, value:bool): + self._useFeatures = value + self._generate_fp_generator(useFeatures=self.useFeatures, radius=self.radius, nBits=self.nBits, + useChirality=self.useChirality, useBondTypes=self.useBondTypes) + + @property + def useBondTypes(self): + return self._useBondTypes + + @useBondTypes.setter + def useBondTypes(self, value:bool): + self._useBondTypes = value + self._generate_fp_generator(useFeatures=self.useFeatures, radius=self.radius, nBits=self.nBits, + useChirality=self.useChirality, useBondTypes=self.useBondTypes) + + @property + def useCounts(self): + return self._useCounts + + @useCounts.setter + def useCounts(self, value:bool): + self._useCounts = value + + def _transform_mol(self, mol) -> np.array: + if self.useCounts: + return self._fpgen.GetCountFingerprintAsNumPy(mol) + else: + return self._fpgen.GetFingerprintAsNumPy(mol) + + +class TopologicalTorsionFPGeneatorTransformer(FpsGeneratorTransformer): + def __init__(self, targetSize:int = 4, fromAtoms = None, ignoreAtoms = None, atomInvariants = None, confId=-1, + includeChirality:bool = False, nBitsPerEntry:int = 4, nBits=2048, + useCounts:bool=False, parallel: Union[bool, int] = False): + + super().__init__(parallel=parallel) + self._fromAtoms = fromAtoms + self._ignoreAtoms = ignoreAtoms + self._atomInvariants = atomInvariants + self._nBitsPerEntry = nBitsPerEntry + self._confId = confId + self._useCounts = useCounts + self._targetSize = targetSize + + self._generate_fp_generator(targetSize=targetSize, includeChirality=includeChirality, + nBits=nBits) + + @property + def useCounts(self): + return self._useCounts + + @useCounts.setter + def useCounts(self, value:bool): + self._useCounts = value + + @property + def confId(self): + return self._confId + + @confId.setter + def confId(self, value: int): + self._confId = value + + @property + def fromAtoms(self): + return self._fromAtoms + + @fromAtoms.setter + def fromAtoms(self, value: int): + self._fromAtoms = value + + @property + def ignoreAtoms(self): + return self._ignoreAtoms + + @ignoreAtoms.setter + def ignoreAtoms(self, value: int): + self._ignoreAtoms = value + + @property + def atomInvariants(self): + return self._atomInvariants + + @atomInvariants.setter + def atomInvariants(self, value: int): + self._atomInvariants = value + + @property + def nBits(self): + return self._fpgen.GetOptions().fpSize + + @nBits.setter + def nBits(self, value: int): + self._fpgen.GetOptions().fpSize = value + + @property + def nBitsPerEntry(self): + return self._nBitsPerEntry + + @nBitsPerEntry.setter + def nBitsPerEntry(self, value: int): + self._nBitsPerEntry = value + + @property + def includeChirality(self): + return self._fpgen.GetOptions().includeChirality + + @includeChirality.setter + def includeChirality(self, value:int): + self._fpgen.GetOptions().includeChirality = value + + @property + def targetSize(self): + return self._targetSize + + @targetSize.setter + def targetSize(self, value:int): + self._targetSize = value + self._generate_fp_generator(targetSize=value, + includeChirality=self.includeChirality, + nBits=self.nBits) + + def _generate_fp_generator(self, targetSize: int, includeChirality: bool, nBits: int): + self._fpgen = GetTopologicalTorsionGenerator(torsionAtomCount=targetSize, includeChirality=includeChirality, + fpSize=nBits) + + def _transform_mol(self, mol) -> np.array: + if self.useCounts: + return self._fpgen.GetCountFingerprintAsNumPy(mol, fromAtoms=self.fromAtoms, ignoreAtoms=self._ignoreAtoms, customAtomInvariants=self._atomInvariants) + else: + return self._fpgen.GetFingerprintAsNumPy(mol, fromAtoms=self.fromAtoms, ignoreAtoms=self._ignoreAtoms, customAtomInvariants=self._atomInvariants) + + +class AtomPairFPGeneratorTransformer(FpsGeneratorTransformer): + def __init__(self, minLength:int = 1, maxLength:int = 30, fromAtoms = None, ignoreAtoms = None, atomInvariants = None, + includeChirality:bool = False, use2D:bool = True, confId:int = -1, nBits=2048, nBitsPerEntry:int = 4, + useCounts:bool=False, parallel: Union[bool, int] = False,): + super().__init__(parallel = parallel) + self._useCounts= useCounts + self._confId = confId + self._fromAtoms = fromAtoms + self._ignoreAtoms = ignoreAtoms + self._atomInvariants = atomInvariants + self._minLength = minLength + self._maxLength = maxLength + + self._generate_fp_generator(minLength=minLength, maxLength=maxLength, + includeChirality=includeChirality, use2D=use2D, + nBits=nBits, nBitsPerEntry=nBitsPerEntry) + + @property + def useCounts(self): + return self._useCounts + + @useCounts.setter + def useCounts(self, value:bool): + self._useCounts = value + + @property + def confId(self): + return self._confId + + @confId.setter + def confId(self, value:int): + self._confId = value + + @property + def fromAtoms(self): + return self._fromAtoms + + @fromAtoms.setter + def fromAtoms(self, value:int): + self._fromAtoms = value + + @property + def ignoreAtoms(self): + return self._ignoreAtoms + + @ignoreAtoms.setter + def ignoreAtoms(self, value:int): + self._ignoreAtoms = value + + @property + def atomInvariants(self): + return self._atomInvariants + + @atomInvariants.setter + def atomInvariants(self, value:int): + self._atomInvariants = value + + @property + def minLength(self): + return self._minLength + + @minLength.setter + def minDistance(self, value: int): + self._minLength = value + self._generate_fp_generator(minLength=value, maxLength=self.maxLength, + includeChirality=self.includeChirality, use2D=self.use2D, + nBits=self.nBits, nBitsPerEntry=self.nBitsPerEntry) + + @property + def maxLength(self): + return self._maxLength + + @maxLength.setter + def maxLength(self, value: int): + self._maxLength = value + self._generate_fp_generator(minLength=self.minLength, maxLength=value, + includeChirality=self.includeChirality, use2D=self.use2D, + nBits=self.nBits, nBitsPerEntry=self.nBitsPerEntry) + + @property + def includeChirality(self): + return self._fpgen.GetOptions().includeChirality + + @includeChirality.setter + def includeChirality(self, value: bool): + self._fpgen.GetOptions().includeChirality = value + + @property + def use2D(self): + return self._fpgen.GetOptions().use2D + + @use2D.setter + def use2D(self, value: bool): + self._fpgen.GetOptions().use2D = value + + @property + def nBits(self): + return self._fpgen.GetOptions().fpSize + + @nBits.setter + def nBits(self, value: int): + self._fpgen.GetOptions().fpSize = value + + @property + def nBitsPerEntry(self): + return self._fpgen.GetOptions().numBitsPerFeature + + @nBitsPerEntry.setter + def nBitsPerEntry(self, value: int): + self._fpgen.GetOptions().numBitsPerFeature = value + + def _generate_fp_generator(self, minLength, maxLength, includeChirality, use2D, nBits, nBitsPerEntry): + self._fpgen = GetAtomPairGenerator(minDistance=minLength, maxDistance=maxLength, + includeChirality=includeChirality, + use2D=use2D, fpSize=nBits) + + def _transform_mol(self, mol) -> np.array: + if self.useCounts: + return self._fpgen.GetCountFingerprintAsNumPy(mol, fromAtoms=self.fromAtoms, ignoreAtoms=self._ignoreAtoms, customAtomInvariants=self._atomInvariants) + else: + return self._fpgen.GetFingerprintAsNumPy(mol, fromAtoms=self.fromAtoms, ignoreAtoms=self._ignoreAtoms, customAtomInvariants=self._atomInvariants) + + +class RDKitFPGeneratorTransformer(FpsGeneratorTransformer): + def __init__(self, minPath:int = 1, maxPath:int =7, useHs:bool = True, branchedPaths:bool = True, + useBondOrder:bool = True, countSimulation:bool = False, countBounds = None, + nBits:int = 2048, numBitsPerFeature:int = 2, + useCounts:bool = False, parallel: Union[bool, int] = False + ): + """Calculates the RDKit fingerprints + + Parameters + ---------- + minPath : int, optional + the minimum path length (in bonds) to be included, by default 1 + maxPath : int, optional + the maximum path length (in bonds) to be included, by default 7 + useHs : bool, optional + toggles inclusion of Hs in paths (if the molecule has explicit Hs), by default True + branchedPaths : bool, optional + toggles generation of branched subgraphs, not just linear paths, by default True + useBondOrder : bool, optional + toggles inclusion of bond orders in the path hashes, by default True + countSimulation : bool, optional + if set, use count simulation while generating the fingerprint, by default False + countBounds : _type_, optional + boundaries for count simulation, corresponding bit will be set if the count is higher than the number provided for that spot, by default None + nBits : int, optional + size of the generated fingerprint, does not affect the sparse versions, by default 2048 + numBitsPerFeature : int, optional + the number of bits set per path/subgraph found, by default 2 + """ + super().__init__(parallel = parallel) + self._useCounts = useCounts + self._countBounds = countBounds + self._generate_fp_generator( minPath=minPath, maxPath=maxPath, useHs=useHs, + branchedPaths=branchedPaths,useBondOrder=useBondOrder, + countSimulation=countSimulation, fpSize=nBits, + countBounds=countBounds, numBitsPerFeature=numBitsPerFeature) + + + @property + def nBits(self): + return self._fpgen.GetOptions().fpSize + @nBits.setter + def nBits(self, value: int): + self._fpgen.GetOptions().fpSize = value + @property + def minPath(self): + return self._fpgen.GetOptions().minPath + @minPath.setter + def minPath(self, value:int): + self._fpgen.GetOptions().minPath = value + @property + def maxPath(self): + return self._fpgen.GetOptions().maxPath + @maxPath.setter + def maxPath(self, value:int): + self._fpgen.GetOptions().maxPath = value + @property + def useHs(self): + return self._fpgen.GetOptions().useHs + @useHs.setter + def useHs(self, value:bool): + self._fpgen.GetOptions().useHs = value + @property + def branchedPaths(self): + return self._fpgen.GetOptions().branchedPaths + @branchedPaths.setter + def branchedPaths(self, value:int): + self._fpgen.GetOptions().branchedPaths = value + @property + def useBondOrder(self): + return self._fpgen.GetOptions().useBondOrder + @useBondOrder.setter + def useBondOrder(self, value:int): + self._fpgen.GetOptions().useBondOrder = value + @property + def numBitsPerFeature(self): + return self._fpgen.GetOptions().numBitsPerFeature + @numBitsPerFeature.setter + def numBitsPerFeature(self, value:int): + self._fpgen.GetOptions().numBitsPerFeature = value + @property + def countBounds(self): + return self._countBounds + @countBounds.setter + def countBounds(self, value:int): + self._countBounds = value + self._generate_fp_generator(minPath=self.minPath, maxPath=self.maxPath, useHs=self.useHs, + branchedPaths=self.branchedPaths,useBondOrder=self.useBondOrder, + countSimulation=self.countSimulation, fpSize=self.nBits, + countBounds=value, numBitsPerFeature=self.numBitsPerFeature) + + @property + def countSimulation(self): + return self._countBounds + @countSimulation.setter + def countSimulation(self, value: bool): + self._countSimulation=value + self._generate_fp_generator(minPath=self.minPath, maxPath=self.maxPath, useHs=self.useHs, + branchedPaths=self.branchedPaths,useBondOrder=self.useBondOrder, + countSimulation=value, fpSize=self.nBits, + countBounds=self.countBounds, numBitsPerFeature=self.numBitsPerFeature) + + @property + def useCounts(self): + return self._useCounts + @useCounts.setter + def useCounts(self, value:bool): + self._useCounts = value + + def _transform_mol(self, mol) -> np.array: + if self.useCounts: + return self._fpgen.GetCountFingerprintAsNumPy(mol) + else: + return self._fpgen.GetFingerprintAsNumPy(mol) + + def _generate_fp_generator(self, minPath, maxPath, useHs, branchedPaths, + useBondOrder, countSimulation, fpSize, countBounds, + numBitsPerFeature): + self._fpgen = GetRDKitFPGenerator(minPath=minPath, maxPath=maxPath, useHs=useHs, + branchedPaths=branchedPaths,useBondOrder=useBondOrder, + countSimulation=countSimulation, fpSize=fpSize, + countBounds=countBounds, numBitsPerFeature=numBitsPerFeature) From 8be105ad1c62247c94d7cd8c09bd92ed572c4c71 Mon Sep 17 00:00:00 2001 From: riesben Date: Sat, 14 Sep 2024 09:51:13 +0200 Subject: [PATCH 02/24] Deprecations warnings in transformers: raise->prints --- scikit_mol/fingerprints.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/scikit_mol/fingerprints.py b/scikit_mol/fingerprints.py index ef33aba..4c63f8f 100644 --- a/scikit_mol/fingerprints.py +++ b/scikit_mol/fingerprints.py @@ -248,7 +248,8 @@ def __init__(self, minLength:int = 1, maxLength:int = 30, fromAtoms = 0, ignoreA self.nBitsPerEntry = nBitsPerEntry self.useCounts = useCounts - raise DeprecationWarning("TopologicalTorsionFingerprintTransformer will be replace by TopologicalTorsionFingerprintGeneratorTransformer, due to changes in RDKit!") + print("AtomPairFingerprintTransformer will be replace by AtomPairFPGeneratorTransformer, due to changes in RDKit!") + #raise DeprecationWarning("AtomPairFingerprintTransformer will be replace by AtomPairFPGeneratorTransformer, due to changes in RDKit!") def _mol2fp(self, mol): @@ -289,7 +290,8 @@ def __init__(self, targetSize:int = 4, fromAtoms = 0, ignoreAtoms = 0, atomInvar self.nBitsPerEntry = nBitsPerEntry self.nBits = nBits self.useCounts = useCounts - raise DeprecationWarning("TopologicalTorsionFingerprintTransformer will be replace by TopologicalTorsionFingerprintGeneratorTransformer, due to changes in RDKit!") + print("TopologicalTorsionFingerprintTransformer will be replace by TopologicalTorsionFPGeneatorTransformer, due to changes in RDKit!") + #raise DeprecationWarning("AtomPairFingerprintTransformer will be replace by AtomPairFPGeneratorTransformer, due to changes in RDKit!") def _mol2fp(self, mol): @@ -488,7 +490,9 @@ def __init__(self, nBits=2048, radius=2, useChirality=False, useBondTypes=True, self.useBondTypes = useBondTypes self.useFeatures = useFeatures self.useCounts = useCounts - raise DeprecationWarning("MorganFingerprintTransformer will be replace by MorganGeneratorTransformer, due to changes in RDKit!") + + print("MorganFingerprintTransformer will be replace by MorganGeneratorTransformer, due to changes in RDKit!") + #raise DeprecationWarning("MorganFingerprintTransformer will be replace by MorganFPGeneratorTransformer, due to changes in RDKit!") def _mol2fp(self, mol): @@ -574,7 +578,7 @@ def __setstate__(self, state): # Restore the state of the parent class super().__setstate__(state) # Re-create the unpicklable property - self._generate_fp_generator() + self._generate_fp_generator(**state) @abstractmethod def _generate_fp_generator(self,*args, **kwargs): @@ -614,6 +618,7 @@ def __init__(self, nBits=2048, radius=2, useChirality=False, self._useFeatures = useFeatures self._useCounts = useCounts self._useBondTypes = useBondTypes + self._generate_fp_generator(useFeatures=useFeatures, radius=radius, nBits=nBits, useChirality=useChirality, useBondTypes=useBondTypes) From 681a493e74fff8c374cbb984da5a55b1712aff74 Mon Sep 17 00:00:00 2001 From: riesben Date: Sat, 14 Sep 2024 12:40:07 +0200 Subject: [PATCH 03/24] minor class property fixes, most test wun now. Need to look into cloning and pickling. --- scikit_mol/fingerprints.py | 45 +++--- tests/test_fptransformersgenerator.py | 188 ++++++++++++++++++++++++++ 2 files changed, 209 insertions(+), 24 deletions(-) create mode 100644 tests/test_fptransformersgenerator.py diff --git a/scikit_mol/fingerprints.py b/scikit_mol/fingerprints.py index 4c63f8f..69ca8fd 100644 --- a/scikit_mol/fingerprints.py +++ b/scikit_mol/fingerprints.py @@ -2,6 +2,8 @@ from multiprocessing import Pool, get_context import multiprocessing import re +import inspect +from typing import Callable from typing import Union from rdkit import Chem from rdkit import DataStructs @@ -571,6 +573,8 @@ def __getstate__(self): # Get the state of the parent class state = super().__getstate__() # Remove the unpicklable property from the state + props = {k:v for k,v in inspect.getmembers(self) if not isinstance(v, Callable) and not k.startswith("_")} + state.update(props) state.pop("_fpgen", None) # fpgen is not picklable return state @@ -578,7 +582,8 @@ def __setstate__(self, state): # Restore the state of the parent class super().__setstate__(state) # Re-create the unpicklable property - self._generate_fp_generator(**state) + generatort_keys = inspect.signature(self._generate_fp_generator).parameters.keys() + self._generate_fp_generator(**{k:state["_"+k] if "_"+k in state else state[k] for k in generatort_keys}) @abstractmethod def _generate_fp_generator(self,*args, **kwargs): @@ -592,6 +597,14 @@ def _transform_mol(self, mol) -> np.array: """ raise NotImplementedError("_transform_mol not implemented") + @property + def fpSize(self): + return self.nBits + + #Scikit-Learn expects to be able to set fpSize directly on object via .set_params(), so this updates nBits used by the abstract class + @fpSize.setter + def fpSize(self, fpSize): + self.nBits = fpSize class MorganFPGeneratorTransformer(FpsGeneratorTransformer): def __init__(self, nBits=2048, radius=2, useChirality=False, @@ -699,18 +712,16 @@ def _transform_mol(self, mol) -> np.array: class TopologicalTorsionFPGeneatorTransformer(FpsGeneratorTransformer): def __init__(self, targetSize:int = 4, fromAtoms = None, ignoreAtoms = None, atomInvariants = None, confId=-1, - includeChirality:bool = False, nBitsPerEntry:int = 4, nBits=2048, + includeChirality:bool = False, nBits=2048, useCounts:bool=False, parallel: Union[bool, int] = False): super().__init__(parallel=parallel) self._fromAtoms = fromAtoms self._ignoreAtoms = ignoreAtoms self._atomInvariants = atomInvariants - self._nBitsPerEntry = nBitsPerEntry self._confId = confId self._useCounts = useCounts self._targetSize = targetSize - self._generate_fp_generator(targetSize=targetSize, includeChirality=includeChirality, nBits=nBits) @@ -762,14 +773,6 @@ def nBits(self): def nBits(self, value: int): self._fpgen.GetOptions().fpSize = value - @property - def nBitsPerEntry(self): - return self._nBitsPerEntry - - @nBitsPerEntry.setter - def nBitsPerEntry(self, value: int): - self._nBitsPerEntry = value - @property def includeChirality(self): return self._fpgen.GetOptions().includeChirality @@ -802,7 +805,7 @@ def _transform_mol(self, mol) -> np.array: class AtomPairFPGeneratorTransformer(FpsGeneratorTransformer): def __init__(self, minLength:int = 1, maxLength:int = 30, fromAtoms = None, ignoreAtoms = None, atomInvariants = None, - includeChirality:bool = False, use2D:bool = True, confId:int = -1, nBits=2048, nBitsPerEntry:int = 4, + includeChirality:bool = False, use2D:bool = True, confId:int = -1, nBits=2048, useCounts:bool=False, parallel: Union[bool, int] = False,): super().__init__(parallel = parallel) self._useCounts= useCounts @@ -815,7 +818,7 @@ def __init__(self, minLength:int = 1, maxLength:int = 30, fromAtoms = None, igno self._generate_fp_generator(minLength=minLength, maxLength=maxLength, includeChirality=includeChirality, use2D=use2D, - nBits=nBits, nBitsPerEntry=nBitsPerEntry) + nBits=nBits) @property def useCounts(self): @@ -862,11 +865,11 @@ def minLength(self): return self._minLength @minLength.setter - def minDistance(self, value: int): + def minLength(self, value: int): self._minLength = value self._generate_fp_generator(minLength=value, maxLength=self.maxLength, includeChirality=self.includeChirality, use2D=self.use2D, - nBits=self.nBits, nBitsPerEntry=self.nBitsPerEntry) + nBits=self.nBits) @property def maxLength(self): @@ -877,7 +880,7 @@ def maxLength(self, value: int): self._maxLength = value self._generate_fp_generator(minLength=self.minLength, maxLength=value, includeChirality=self.includeChirality, use2D=self.use2D, - nBits=self.nBits, nBitsPerEntry=self.nBitsPerEntry) + nBits=self.nBits) @property def includeChirality(self): @@ -907,11 +910,7 @@ def nBits(self, value: int): def nBitsPerEntry(self): return self._fpgen.GetOptions().numBitsPerFeature - @nBitsPerEntry.setter - def nBitsPerEntry(self, value: int): - self._fpgen.GetOptions().numBitsPerFeature = value - - def _generate_fp_generator(self, minLength, maxLength, includeChirality, use2D, nBits, nBitsPerEntry): + def _generate_fp_generator(self, minLength, maxLength, includeChirality, use2D, nBits): self._fpgen = GetAtomPairGenerator(minDistance=minLength, maxDistance=maxLength, includeChirality=includeChirality, use2D=use2D, fpSize=nBits) @@ -1013,7 +1012,6 @@ def countBounds(self, value:int): branchedPaths=self.branchedPaths,useBondOrder=self.useBondOrder, countSimulation=self.countSimulation, fpSize=self.nBits, countBounds=value, numBitsPerFeature=self.numBitsPerFeature) - @property def countSimulation(self): return self._countBounds @@ -1024,7 +1022,6 @@ def countSimulation(self, value: bool): branchedPaths=self.branchedPaths,useBondOrder=self.useBondOrder, countSimulation=value, fpSize=self.nBits, countBounds=self.countBounds, numBitsPerFeature=self.numBitsPerFeature) - @property def useCounts(self): return self._useCounts diff --git a/tests/test_fptransformersgenerator.py b/tests/test_fptransformersgenerator.py new file mode 100644 index 0000000..f11ea95 --- /dev/null +++ b/tests/test_fptransformersgenerator.py @@ -0,0 +1,188 @@ +import pickle +import tempfile +import pytest +import numpy as np +from fixtures import mols_list, smiles_list, mols_container, smiles_container, fingerprint, chiral_smiles_list, chiral_mols_list +from sklearn import clone + +from scikit_mol.fingerprints import (MorganFPGeneratorTransformer, + RDKitFPGeneratorTransformer, + AtomPairFPGeneratorTransformer, + TopologicalTorsionFPGeneatorTransformer, + ) + +test_transformers = [MorganFPGeneratorTransformer, RDKitFPGeneratorTransformer, + AtomPairFPGeneratorTransformer, TopologicalTorsionFPGeneatorTransformer] + + +@pytest.mark.parametrize("transformer_class", test_transformers) +def test_fpstransformer_fp2array(transformer_class, fingerprint): + transformer = transformer_class() + + with pytest.raises(DeprecationWarning, match='Generators can directly return fingerprints'): + fp = transformer._fp2array(fingerprint) + + +@pytest.mark.parametrize("transformer_class", test_transformers) +def test_fpstransformer_transform_mol(transformer_class, mols_list): + transformer = transformer_class() + + fp = transformer._transform_mol(mols_list[0]) + #See that fp is the correct type, shape and bit count + assert(type(fp) == type(np.array([0]))) + assert(fp.shape == (2048,)) + + if isinstance(transformer, RDKitFPGeneratorTransformer): + assert(fp.sum() == 104) + elif isinstance(transformer, AtomPairFPGeneratorTransformer): + assert (fp.sum() == 32) + elif isinstance(transformer, TopologicalTorsionFPGeneatorTransformer): + assert (fp.sum() == 12) + elif isinstance(transformer, MorganFPGeneratorTransformer): + assert (fp.sum() == 14) + else: + raise NotImplementedError("missing Assert") + +@pytest.mark.parametrize("transformer_class", test_transformers) +def test_clonability(transformer_class): + transformer = transformer_class() + + params = transformer.get_params() + t2 = clone(transformer) + params_2 = t2.get_params() + #Parameters of cloned transformers should be the same + assert all([ params[key] == params_2[key] for key in params.keys()]) + #Cloned transformers should not be the same object + assert t2 != transformer + +@pytest.mark.parametrize("transformer_class", test_transformers) +def test_set_params(transformer_class): + transformer = transformer_class() + params = transformer.get_params() + #change extracted dictionary + params['nBits'] = 4242 + #change params in transformer + transformer.set_params(nBits = 4242) + # get parameters as dictionary and assert that it is the same + params_2 = transformer.get_params() + assert all([ params[key] == params_2[key] for key in params.keys()]) + +@pytest.mark.parametrize("transformer_class", test_transformers) +def test_transform(mols_container, transformer_class): + transformer = transformer_class() + #Test the different transformers + params = transformer.get_params() + fps = transformer.transform(mols_container) + #Assert that the same length of input and output + assert len(fps) == len(mols_container) + + fpsize = params['nBits'] + + assert len(fps[0]) == fpsize + +@pytest.mark.parametrize("transformer_class", test_transformers) +def test_transform_parallel(mols_container, transformer_class): + transformer = transformer_class() + #Test the different transformers + transformer.set_params(parallel=True) + params = transformer.get_params() + fps = transformer.transform(mols_container) + #Assert that the same length of input and output + assert len(fps) == len(mols_container) + + fpsize = params['nBits'] + assert len(fps[0]) == fpsize + + +@pytest.mark.parametrize("transformer_class", test_transformers) +def test_picklable(transformer_class): + #Test the different transformers + transformer = transformer_class() + p = transformer.get_params() + + with tempfile.NamedTemporaryFile() as f: + pickle.dump(transformer, f) + f.seek(0) + t2 = pickle.load(f) + print(p) + print(vars(transformer)) + print(vars(t2)) + assert(transformer.get_params() == t2.get_params()) + + +@pytest.mark.parametrize("transfomer", test_transformers) +def assert_transformer_set_params(transfomer, new_params, mols_list): + default_params = transfomer().get_params() + + for key in new_params.keys(): + tr = transfomer() + params = tr.get_params() + params[key] = new_params[key] + + fps_default = tr.transform(mols_list) + + tr.set_params(**params) + new_tr = transfomer(**params) + fps_reset_params = tr.transform(mols_list) + fps_init_new_params = new_tr.transform(mols_list) + + # Now fp_default should not be the same as fp_reset_params + + assert ~np.all([np.array_equal(fp_default, fp_reset_params) for fp_default, fp_reset_params in zip(fps_default, fps_reset_params)]), f"Assertation error, FP appears the same, although the {key} should be changed from {default_params[key]} to {params[key]}" + # fp_reset_params and fp_init_new_params should however be the same + assert np.all([np.array_equal(fp_init_new_params, fp_reset_params) for fp_init_new_params, fp_reset_params in zip(fps_init_new_params, fps_reset_params)]) , f"Assertation error, FP appears to be different, although the {key} should be changed back as well as initialized to {params[key]}" + + +def test_morgan_set_params(chiral_mols_list): + new_params = {'nBits': 1024, + 'radius': 1, + 'useBondTypes': False,# TODO, why doesn't this change the FP? + 'useChirality': True, + 'useCounts': True, + 'useFeatures': True} + + assert_transformer_set_params(MorganFPGeneratorTransformer, new_params, chiral_mols_list) + + +def test_atompairs_set_params(chiral_mols_list): + new_params = { + #'atomInvariants': 1, + #'confId': -1, + #'fromAtoms': 1, + #'ignoreAtoms': 0, + 'includeChirality': True, + 'maxLength': 3, + 'minLength': 3, + 'nBits': 1024, + #'nBitsPerEntry': 3, #Todo: not setable with the generators? + #'use2D': True, #TODO, understand why this can't be set different + 'useCounts': True} + + assert_transformer_set_params(AtomPairFPGeneratorTransformer, new_params, chiral_mols_list) + + +def test_topologicaltorsion_set_params(chiral_mols_list): + new_params = {#'atomInvariants': 0, + #'fromAtoms': 0, + #'ignoreAtoms': 0, + #'includeChirality': True, #TODO, figure out why this setting seems to give same FP wheter toggled or not + 'nBits': 1024, + #'nBitsPerEntry': 3, #Todo: not setable with the generators? + 'targetSize': 5, + 'useCounts': True} + + assert_transformer_set_params(TopologicalTorsionFPGeneatorTransformer, new_params, chiral_mols_list) + +def test_RDKitFPTransformer(chiral_mols_list): + new_params = {#'atomInvariantsGenerator': None, + #'branchedPaths': False, + #'countBounds': 0, #TODO: What does this do? + 'countSimulation': True, + 'nBits': 1024, + 'maxPath': 3, + 'minPath': 2, + 'numBitsPerFeature': 3, + 'useBondOrder': False, #TODO, why doesn't this change the FP? + #'useHs': False, #TODO, why doesn't this change the FP? + } + assert_transformer_set_params(RDKitFPGeneratorTransformer, new_params, chiral_mols_list) From c812214103036b0596f82d15961894e197bfb28d Mon Sep 17 00:00:00 2001 From: riesben Date: Mon, 16 Sep 2024 23:14:01 +0200 Subject: [PATCH 04/24] fixes for bugs --- scikit_mol/fingerprints.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scikit_mol/fingerprints.py b/scikit_mol/fingerprints.py index 69ca8fd..66d6c51 100644 --- a/scikit_mol/fingerprints.py +++ b/scikit_mol/fingerprints.py @@ -572,9 +572,8 @@ def _mol2fp(self, mol): def __getstate__(self): # Get the state of the parent class state = super().__getstate__() + state.update(self.get_params()) # Remove the unpicklable property from the state - props = {k:v for k,v in inspect.getmembers(self) if not isinstance(v, Callable) and not k.startswith("_")} - state.update(props) state.pop("_fpgen", None) # fpgen is not picklable return state @@ -583,7 +582,8 @@ def __setstate__(self, state): super().__setstate__(state) # Re-create the unpicklable property generatort_keys = inspect.signature(self._generate_fp_generator).parameters.keys() - self._generate_fp_generator(**{k:state["_"+k] if "_"+k in state else state[k] for k in generatort_keys}) + params = {k:state["_"+k] if "_"+k in state else state[k] for k in generatort_keys} + self._generate_fp_generator(**params) @abstractmethod def _generate_fp_generator(self,*args, **kwargs): From 523b1905bcf1de2a7e9d5f8c59dc1482607fb338 Mon Sep 17 00:00:00 2001 From: Esben Jannik Bjerrum Date: Sat, 26 Oct 2024 15:56:13 +0200 Subject: [PATCH 05/24] updated readmes --- CONTRIBUTION.md | 8 +++++--- README.md | 7 ++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTION.md b/CONTRIBUTION.md index 3e39632..1b9aa61 100644 --- a/CONTRIBUTION.md +++ b/CONTRIBUTION.md @@ -5,7 +5,7 @@ Thanks for your interest in contributing to the project. Please read on in the s ## Slack channel We have a slack channel for communication, ask for an invite: esbenbjerrum+scikit_mol@gmail.com -It's not really active and Slack wan't to be paid now. Maybe we can use Discord instead. +It's not really active and Slack wan't to be paid now. Maybe we can use Discord instead as slack is now deleting old threads. ## Installation @@ -22,12 +22,13 @@ The projects transformers subclasses the BaseEstimator and Transformer mixin cla - The arguments accepted by **init** should all be keyword arguments with a default value. - Every keyword argument accepted by **init** should correspond to an attribute on the instance. -- - There should be no logic, not even input validation, and the parameters should not be changed. +- - There should be no logic, not even input validation, and the parameters should not be changed inside the **init** function. Scikit-learn classes depends on this in order to for e.g. the .get_params(), .set_params(), cloning abilities and representation rendering to work. +- With the new error handling, falsy objects need to return masked arrays or arrays with np.nan (for float dtype) ### Tips -- We have observed that some external tools used "exotic" types such at np.int64 when doing hyperparameter tuning. It is thus necessary to cast to standard types before making calls to rdkit functions. This behaviour is tested in the test_parameter_types test +- We have observed that some external tools used "exotic" types such at np.int64 when doing hyperparameter tuning. It is thus necessary do defensive programming to cast parameters to standard types before making calls to rdkit functions. This behaviour is tested in the test_parameter_types test - @property getters and setters can be used if additional logic are needed when setting the attributes from the keywords while at the same time adhering to the sklearn requisites. @@ -48,6 +49,7 @@ parameters and output of methods should preferably be using typehints ## Testing New transformer classes should be added to the pytest tests in the tests directory. A lot of tests are made general, and tests aspects of the transformers that are needed for sklearn compliance or other features. The transformer is then added to a fixture and can be added to the lists of transformer objects that are run by these test. Specific tests may also be necessary to set up. As exampe the assert_transformer_set_params needs a list of non-default parameters in order to set the set_params functionality of the object. +Scikit-Learn has a check_estimator that we should strive to get to work, some classes of scikit-mol currently does not pass all tests. ## Notebooks diff --git a/README.md b/README.md index d59717d..7c6255f 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,12 @@ There are a collection of notebooks in the notebooks directory which demonstrate We also put a software note on ChemRxiv. [https://doi.org/10.26434/chemrxiv-2023-fzqwd](https://doi.org/10.26434/chemrxiv-2023-fzqwd) -## Contributing +## Roadmap and Contributing + +_Help wanted!_ Are you a PhD student that want a "side-quest" to procrastinate your thesis writing or are you simply interested in computational chemistry, cheminformatics or simply with an interest in QSAR modelling, Python Programming open-source software? Do you want to learn more about machine learning with Scikit-Learn? Or do you use scikit-mol for your current work and would like to pay a little back to the project and see it improved as well? +With a little bit of help, this project can be improved much faster! Reach to me (Esben), for a discussion about how we can proceed. + +Currently we are working on fixing some deprecation warnings, its not the most exciting work, but it's important to maintain a little. Later on we need to go over the scikit-learn compatibility and update to some of their newer features on their estimator classes. We're also brewing on some feature enhancements and tests, such as new fingerprints and a more versatile standardizer. There are more information about how to contribute to the project in [CONTRIBUTION.md](https://github.com/EBjerrum/scikit-mol/CONTRIBUTION.md) From fdd8624ece60a5fd0a245eb67bed042b17e91ca6 Mon Sep 17 00:00:00 2001 From: riesben Date: Thu, 14 Nov 2024 22:32:53 +0100 Subject: [PATCH 06/24] Remodelling transformers: - nBits->fpSize - remove properties / overwrite setattr - adapt tests. --- scikit_mol/fingerprints.py | 550 +++++++------------------- tests/test_fptransformers.py | 55 +-- tests/test_fptransformersgenerator.py | 16 +- tests/test_safeinferencemode.py | 4 +- tests/test_transformers.py | 7 +- 5 files changed, 173 insertions(+), 459 deletions(-) diff --git a/scikit_mol/fingerprints.py b/scikit_mol/fingerprints.py index ace9795..a96e65b 100644 --- a/scikit_mol/fingerprints.py +++ b/scikit_mol/fingerprints.py @@ -56,7 +56,7 @@ def _get_column_prefix(self) -> str: return "fp" def _get_n_digits_column_suffix(self) -> int: - return len(str(self.nBits)) + return len(str(self.fpSize)) def get_display_feature_names_out(self, input_features=None): """Get feature names for display purposes @@ -68,7 +68,7 @@ def get_display_feature_names_out(self, input_features=None): prefix = self._get_column_prefix() n_digits = self._get_n_digits_column_suffix() return np.array( - [f"{prefix}_{str(i).zfill(n_digits)}" for i in range(1, self.nBits + 1)] + [f"{prefix}_{str(i).zfill(n_digits)}" for i in range(1, self.fpSize + 1)] ) def get_feature_names_out(self, input_features=None): @@ -78,7 +78,7 @@ def get_feature_names_out(self, input_features=None): to get the column names of the transformed dataframe. """ prefix = self._get_column_prefix() - return np.array([f"{prefix}_{i}" for i in range(1, self.nBits + 1)]) + return np.array([f"{prefix}_{i}" for i in range(1, self.fpSize + 1)]) @abstractmethod def _mol2fp(self, mol): @@ -90,11 +90,11 @@ def _mol2fp(self, mol): def _fp2array(self, fp): if fp: - arr = np.zeros((self.nBits,), dtype=self.dtype) + arr = np.zeros((self.fpSize,), dtype=self.dtype) DataStructs.ConvertToNumpyArray(fp, arr) return arr else: - return np.ma.masked_all((self.nBits,), dtype=self.dtype) + return np.ma.masked_all((self.fpSize,), dtype=self.dtype) def _transform_mol(self, mol): if not mol and self.safe_inference_mode: @@ -120,16 +120,17 @@ def _transform(self, X): if self.safe_inference_mode: # Use the new method with masked arrays if we're in safe inference mode arrays = [self._transform_mol(mol) for mol in X] + print(arrays) return np.ma.stack(arrays) else: # Use the original, faster method if we're not in safe inference mode - arr = np.zeros((len(X), self.nBits), dtype=self.dtype) + arr = np.zeros((len(X), self.fpSize), dtype=self.dtype) for i, mol in enumerate(X): arr[i, :] = self._transform_mol(mol) return arr def _transform_sparse(self, X): - arr = np.zeros((len(X), self.nBits), dtype=self.dtype) + arr = np.zeros((len(X), self.fpSize), dtype=self.dtype) for i, mol in enumerate(X): arr[i, :] = self._transform_mol(mol) @@ -189,6 +190,7 @@ def __init__( parallel: Union[bool, int] = False, safe_inference_mode: bool = False, dtype: np.dtype = np.int8, + fpSize=167, ): """MACCS keys fingerprinter calculates the 167 fixed MACCS keys @@ -196,19 +198,23 @@ def __init__( super().__init__( parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype ) - self.nBits = 167 + if fpSize != 167: + raise ValueError( + "fpSize can only be 167, matching the number of defined MACCS keys!" + ) + self._fpSize = fpSize @property - def nBits(self): - return self._nBits + def fpSize(self): + return self._fpSize - @nBits.setter - def nBits(self, nBits): - if nBits != 167: + @fpSize.setter + def fpSize(self, fpSize): + if fpSize != 167: raise ValueError( - "nBits can only be 167, matching the number of defined MACCS keys!" + "fpSize can only be 167, matching the number of defined MACCS keys!" ) - self._nBits = nBits + self._fpSize = fpSize def _mol2fp(self, mol): return rdMolDescriptors.GetMACCSKeysFingerprint(mol) @@ -270,14 +276,6 @@ def __init__( self.numBitsPerFeature = numBitsPerFeature self.atomInvariantsGenerator = atomInvariantsGenerator - @property - def fpSize(self): - return self.nBits - - # Scikit-Learn expects to be able to set fpSize directly on object via .set_params(), so this updates nBits used by the abstract class - @fpSize.setter - def fpSize(self, fpSize): - self.nBits = fpSize def _mol2fp(self, mol): generator = rdFingerprintGenerator.GetRDKitFPGenerator( @@ -307,7 +305,7 @@ def __init__( includeChirality: bool = False, use2D: bool = True, confId: int = -1, - nBits=2048, + fpSize=2048, useCounts: bool = False, parallel: Union[bool, int] = False, safe_inference_mode: bool = False, @@ -324,7 +322,7 @@ def __init__( self.includeChirality = includeChirality self.use2D = use2D self.confId = confId - self.nBits = nBits + self.fpSize = fpSize self.nBitsPerEntry = nBitsPerEntry self.useCounts = useCounts @@ -336,7 +334,7 @@ def _mol2fp(self, mol): if self.useCounts: return rdMolDescriptors.GetHashedAtomPairFingerprint( mol, - nBits=int(self.nBits), + nBits=int(self.fpSize), minLength=int(self.minLength), maxLength=int(self.maxLength), fromAtoms=self.fromAtoms, @@ -349,7 +347,7 @@ def _mol2fp(self, mol): else: return rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( mol, - nBits=int(self.nBits), + nBits=int(self.fpSize), minLength=int(self.minLength), maxLength=int(self.maxLength), fromAtoms=self.fromAtoms, @@ -371,7 +369,7 @@ def __init__( atomInvariants=0, includeChirality: bool = False, nBitsPerEntry: int = 4, - nBits=2048, + fpSize=2048, useCounts: bool = False, parallel: Union[bool, int] = False, safe_inference_mode: bool = False, @@ -386,7 +384,7 @@ def __init__( self.atomInvariants = atomInvariants self.includeChirality = includeChirality self.nBitsPerEntry = nBitsPerEntry - self.nBits = nBits + self.fpSize = fpSize self.useCounts = useCounts print("TopologicalTorsionFingerprintTransformer will be replace by TopologicalTorsionFPGeneatorTransformer, due to changes in RDKit!") #raise DeprecationWarning("AtomPairFingerprintTransformer will be replace by AtomPairFPGeneratorTransformer, due to changes in RDKit!") @@ -396,7 +394,7 @@ def _mol2fp(self, mol): if self.useCounts: return rdMolDescriptors.GetHashedTopologicalTorsionFingerprint( mol, - nBits=int(self.nBits), + nBits=int(self.fpSize), targetSize=int(self.targetSize), fromAtoms=self.fromAtoms, ignoreAtoms=self.ignoreAtoms, @@ -406,7 +404,7 @@ def _mol2fp(self, mol): else: return rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( mol, - nBits=int(self.nBits), + nBits=int(self.fpSize), targetSize=int(self.targetSize), fromAtoms=self.fromAtoms, ignoreAtoms=self.ignoreAtoms, @@ -424,7 +422,7 @@ def __init__( isomeric: bool = False, kekulize: bool = False, min_radius: int = 1, - n_permutations: int = 2048, + fpSize: int = 2048, seed: int = 42, parallel: Union[bool, int] = False, safe_inference_mode: bool = False, @@ -440,7 +438,7 @@ def __init__( isomeric (bool, optional): Whether the isomeric SMILES to be considered. Defaults to False. kekulize (bool, optional): Whether or not to kekulize the extracted SMILES. Defaults to False. min_radius (int, optional): The minimum radius that is used to extract n-gram. Defaults to 1. - n_permutations (int, optional): The number of permutations used for hashing. Defaults to 0, + fpSize (int, optional): The number of permutations used for hashing. Defaults to 2048, this is effectively the length of the FP seed (int, optional): The value used to seed numpy.random. Defaults to 0. """ @@ -453,7 +451,7 @@ def __init__( self.kekulize = kekulize self.min_radius = min_radius # Set the .n_permutations and .seed without creating the encoder twice - self._n_permutations = n_permutations + self.fpSize = fpSize self._seed = seed # create the encoder instance self._recreate_encoder() @@ -482,7 +480,7 @@ def _fp2array(self, fp): def _recreate_encoder(self): self.mhfp_encoder = rdMHFPFingerprint.MHFPEncoder( - self._n_permutations, self._seed + self.fpSize, self._seed ) @property @@ -497,19 +495,14 @@ def seed(self, seed): @property def n_permutations(self): - return self._n_permutations + return self.fpSize @n_permutations.setter def n_permutations(self, n_permutations): - self._n_permutations = n_permutations + self.fpSize = n_permutations # each time the n_permutations parameter is modified refresh an instance of the encoder self._recreate_encoder() - @property - def nBits(self): - # to be compliant with the requirement of the base class - return self._n_permutations - class SECFingerprintTransformer(FpsTransformer): # https://jcheminf.biomedcentral.com/articles/10.1186/s13321-018-0321-8 @@ -520,7 +513,7 @@ def __init__( isomeric: bool = False, kekulize: bool = False, min_radius: int = 1, - length: int = 2048, + fpSize: int = 2048, n_permutations: int = 0, seed: int = 0, parallel: Union[bool, int] = False, @@ -535,7 +528,7 @@ def __init__( isomeric (bool, optional): Whether the isomeric SMILES to be considered. Defaults to False. kekulize (bool, optional): Whether or not to kekulize the extracted SMILES. Defaults to False. min_radius (int, optional): The minimum radius that is used to extract n-gram. Defaults to 1. - length (int, optional): The length of the folded fingerprint. Defaults to 2048. + fpSize (int, optional): The length of the folded fingerprint. Defaults to 2048. n_permutations (int, optional): The number of permutations used for hashing. Defaults to 0. seed (int, optional): The value used to seed numpy.random. Defaults to 0. """ @@ -547,7 +540,7 @@ def __init__( self.isomeric = isomeric self.kekulize = kekulize self.min_radius = min_radius - self.length = length + self.fpSize = fpSize # Set the .n_permutations and seed without creating the encoder twice self._n_permutations = n_permutations self._seed = seed @@ -604,15 +597,15 @@ def n_permutations(self, n_permutations): self._recreate_encoder() @property - def nBits(self): + def length(self): # to be compliant with the requirement of the base class - return self.length + return self.fpSize class MorganFingerprintTransformer(FpsTransformer): def __init__( self, - nBits=2048, + fpSize=2048, radius=2, useChirality=False, useBondTypes=True, @@ -626,7 +619,7 @@ def __init__( Parameters ---------- - nBits : int, optional + fpSize : int, optional Size of the hashed fingerprint, by default 2048 radius : int, optional Radius of the fingerprint, by default 2 @@ -642,7 +635,7 @@ def __init__( super().__init__( parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype ) - self.nBits = nBits + self.fpSize = fpSize self.radius = radius self.useChirality = useChirality self.useBondTypes = useBondTypes @@ -658,7 +651,7 @@ def _mol2fp(self, mol): return rdMolDescriptors.GetHashedMorganFingerprint( mol, int(self.radius), - nBits=int(self.nBits), + nBits=int(self.fpSize), useFeatures=bool(self.useFeatures), useChirality=bool(self.useChirality), useBondTypes=bool(self.useBondTypes), @@ -667,7 +660,7 @@ def _mol2fp(self, mol): return rdMolDescriptors.GetMorganFingerprintAsBitVect( mol, int(self.radius), - nBits=int(self.nBits), + nBits=int(self.fpSize), useFeatures=bool(self.useFeatures), useChirality=bool(self.useChirality), useBondTypes=bool(self.useBondTypes), @@ -678,7 +671,7 @@ class AvalonFingerprintTransformer(FpsTransformer): # Fingerprint from the Avalon toolkeit, https://doi.org/10.1021/ci050413p def __init__( self, - nBits: int = 512, + fpSize: int = 512, isQuery: bool = False, resetVect: bool = False, bitFlags: int = 15761407, @@ -691,7 +684,7 @@ def __init__( Parameters ---------- - nBits : int, optional + fpSize : int, optional Size of the fingerprint, by default 512 isQuery : bool, optional use the fingerprint for a query structure, by default False @@ -705,7 +698,7 @@ def __init__( super().__init__( parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype ) - self.nBits = nBits + self.fpSize = fpSize self.isQuery = isQuery self.resetVect = resetVect self.bitFlags = bitFlags @@ -715,14 +708,14 @@ def _mol2fp(self, mol): if self.useCounts: return pyAvalonTools.GetAvalonCountFP( mol, - nBits=int(self.nBits), + nBits=int(self.fpSize), isQuery=bool(self.isQuery), bitFlags=int(self.bitFlags), ) else: return pyAvalonTools.GetAvalonFP( mol, - nBits=int(self.nBits), + nBits=int(self.fpSize), isQuery=bool(self.isQuery), resetVect=bool(self.resetVect), bitFlags=int(self.bitFlags), @@ -740,7 +733,7 @@ def parallel_helper(args): return transformer._transform(X_mols) class FpsGeneratorTransformer(FpsTransformer): - + _regenerate_on_properties = () def _fp2array(self, fp): raise DeprecationWarning("Generators can directly return fingerprints") @@ -761,11 +754,19 @@ def __setstate__(self, state): super().__setstate__(state) # Re-create the unpicklable property generatort_keys = inspect.signature(self._generate_fp_generator).parameters.keys() - params = {k:state["_"+k] if "_"+k in state else state[k] for k in generatort_keys} - self._generate_fp_generator(**params) + params = [setattr(self, k, state["_"+k]) if "_"+k in state else setattr(self, k, state[k]) for k in generatort_keys] + self._generate_fp_generator() + + def __setattr__(self, name: str, value): + super().__setattr__(name, value) + if ( + not hasattr(self, "_initializing") + and name in self._regenerate_on_properties + ): + self._generate_fp_generator() @abstractmethod - def _generate_fp_generator(self,*args, **kwargs): + def _generate_fp_generator(self): raise NotImplementedError("_generate_fp_generator not implemented") @abstractmethod @@ -776,24 +777,18 @@ def _transform_mol(self, mol) -> np.array: """ raise NotImplementedError("_transform_mol not implemented") - @property - def fpSize(self): - return self.nBits - - #Scikit-Learn expects to be able to set fpSize directly on object via .set_params(), so this updates nBits used by the abstract class - @fpSize.setter - def fpSize(self, fpSize): - self.nBits = fpSize class MorganFPGeneratorTransformer(FpsGeneratorTransformer): - def __init__(self, nBits=2048, radius=2, useChirality=False, + _regenerate_on_properties = ("radius", "fpSize", "useChirality", "useFeatures", "useBondTypes") + + def __init__(self, fpSize=2048, radius=2, useChirality=False, useBondTypes=True, useFeatures=False, useCounts=False, - parallel: Union[bool, int] = False,): + parallel: Union[bool, int] = False, ): """Transform RDKit mols into Count or bit-based hashed MorganFingerprints Parameters ---------- - nBits : int, optional + fpsize : int, optional Size of the hashed fingerprint, by default 2048 radius : int, optional Radius of the fingerprint, by default 2 @@ -806,82 +801,34 @@ def __init__(self, nBits=2048, radius=2, useChirality=False, useCounts : bool, optional If toggled will create the count and not bit-based fingerprint, by default False """ + + self._initializing = True super().__init__(parallel = parallel) - self._useFeatures = useFeatures - self._useCounts = useCounts - self._useBondTypes = useBondTypes + self.fpSize = fpSize + self.radius = radius + self.useChirality = useChirality + self.useFeatures = useFeatures + self.useCounts = useCounts + self.useBondTypes = useBondTypes - self._generate_fp_generator(useFeatures=useFeatures, radius=radius, nBits=nBits, - useChirality=useChirality, useBondTypes=useBondTypes) + self._generate_fp_generator() + delattr(self, "_initializing") - def _generate_fp_generator(self, useFeatures:bool, radius:int, nBits:int, - useChirality:bool, useBondTypes:bool): + def _generate_fp_generator(self): - if useFeatures: + if self.useFeatures: atomInvariantsGenerator = GetMorganFeatureAtomInvGen() else: atomInvariantsGenerator = None - self._fpgen = GetMorganGenerator(radius=radius, - fpSize=nBits, - includeChirality=useChirality, - useBondTypes=useBondTypes, + self._fpgen = GetMorganGenerator(radius=self.radius, + fpSize=self.fpSize, + includeChirality=self.useChirality, + useBondTypes=self.useBondTypes, atomInvariantsGenerator=atomInvariantsGenerator, ) - @property - def radius(self): - return self._fpgen.GetOptions().radius - - @radius.setter - def radius(self, value:int): - self._fpgen.GetOptions().radius = value - - @property - def nBits(self): - return self._fpgen.GetOptions().fpSize - - @nBits.setter - def nBits(self, value:int): - self._fpgen.GetOptions().fpSize = value - - @property - def useChirality(self): - return self._fpgen.GetOptions().includeChirality - - @useChirality.setter - def useChirality(self, value:bool): - self._fpgen.GetOptions().includeChirality = value - - @property - def useFeatures(self): - return self._useFeatures - - @useFeatures.setter - def useFeatures(self, value:bool): - self._useFeatures = value - self._generate_fp_generator(useFeatures=self.useFeatures, radius=self.radius, nBits=self.nBits, - useChirality=self.useChirality, useBondTypes=self.useBondTypes) - - @property - def useBondTypes(self): - return self._useBondTypes - - @useBondTypes.setter - def useBondTypes(self, value:bool): - self._useBondTypes = value - self._generate_fp_generator(useFeatures=self.useFeatures, radius=self.radius, nBits=self.nBits, - useChirality=self.useChirality, useBondTypes=self.useBondTypes) - - @property - def useCounts(self): - return self._useCounts - - @useCounts.setter - def useCounts(self, value:bool): - self._useCounts = value - def _transform_mol(self, mol) -> np.array: if self.useCounts: return self._fpgen.GetCountFingerprintAsNumPy(mol) @@ -890,221 +837,81 @@ def _transform_mol(self, mol) -> np.array: class TopologicalTorsionFPGeneatorTransformer(FpsGeneratorTransformer): + _regenerate_on_properties = ("fpSize", "includeChirality", "targetSize") + def __init__(self, targetSize:int = 4, fromAtoms = None, ignoreAtoms = None, atomInvariants = None, confId=-1, - includeChirality:bool = False, nBits=2048, + includeChirality:bool = False, fpSize:int=2048, useCounts:bool=False, parallel: Union[bool, int] = False): + self._initializing = True super().__init__(parallel=parallel) - self._fromAtoms = fromAtoms - self._ignoreAtoms = ignoreAtoms - self._atomInvariants = atomInvariants - self._confId = confId - self._useCounts = useCounts - self._targetSize = targetSize - self._generate_fp_generator(targetSize=targetSize, includeChirality=includeChirality, - nBits=nBits) - - @property - def useCounts(self): - return self._useCounts - - @useCounts.setter - def useCounts(self, value:bool): - self._useCounts = value - - @property - def confId(self): - return self._confId - - @confId.setter - def confId(self, value: int): - self._confId = value - - @property - def fromAtoms(self): - return self._fromAtoms - - @fromAtoms.setter - def fromAtoms(self, value: int): - self._fromAtoms = value - - @property - def ignoreAtoms(self): - return self._ignoreAtoms - - @ignoreAtoms.setter - def ignoreAtoms(self, value: int): - self._ignoreAtoms = value - - @property - def atomInvariants(self): - return self._atomInvariants - - @atomInvariants.setter - def atomInvariants(self, value: int): - self._atomInvariants = value - - @property - def nBits(self): - return self._fpgen.GetOptions().fpSize - - @nBits.setter - def nBits(self, value: int): - self._fpgen.GetOptions().fpSize = value - - @property - def includeChirality(self): - return self._fpgen.GetOptions().includeChirality + self.fpSize = fpSize + self.includeChirality = includeChirality + self.targetSize = targetSize - @includeChirality.setter - def includeChirality(self, value:int): - self._fpgen.GetOptions().includeChirality = value + self.fromAtoms = fromAtoms + self.ignoreAtoms = ignoreAtoms + self.atomInvariants = atomInvariants + self.confId = confId + self.useCounts = useCounts - @property - def targetSize(self): - return self._targetSize + self._generate_fp_generator() + delattr(self, "_initializing") - @targetSize.setter - def targetSize(self, value:int): - self._targetSize = value - self._generate_fp_generator(targetSize=value, - includeChirality=self.includeChirality, - nBits=self.nBits) - def _generate_fp_generator(self, targetSize: int, includeChirality: bool, nBits: int): - self._fpgen = GetTopologicalTorsionGenerator(torsionAtomCount=targetSize, includeChirality=includeChirality, - fpSize=nBits) + def _generate_fp_generator(self): + self._fpgen = GetTopologicalTorsionGenerator(torsionAtomCount=self.targetSize, includeChirality=self.includeChirality, + fpSize=self.fpSize) def _transform_mol(self, mol) -> np.array: if self.useCounts: - return self._fpgen.GetCountFingerprintAsNumPy(mol, fromAtoms=self.fromAtoms, ignoreAtoms=self._ignoreAtoms, customAtomInvariants=self._atomInvariants) + return self._fpgen.GetCountFingerprintAsNumPy(mol, fromAtoms=self.fromAtoms, ignoreAtoms=self.ignoreAtoms, customAtomInvariants=self.atomInvariants) else: - return self._fpgen.GetFingerprintAsNumPy(mol, fromAtoms=self.fromAtoms, ignoreAtoms=self._ignoreAtoms, customAtomInvariants=self._atomInvariants) + return self._fpgen.GetFingerprintAsNumPy(mol, fromAtoms=self.fromAtoms, ignoreAtoms=self.ignoreAtoms, customAtomInvariants=self.atomInvariants) class AtomPairFPGeneratorTransformer(FpsGeneratorTransformer): + _regenerate_on_properties = ("fpSize", "includeChirality", "use2D", "minLength", "maxLength") + def __init__(self, minLength:int = 1, maxLength:int = 30, fromAtoms = None, ignoreAtoms = None, atomInvariants = None, - includeChirality:bool = False, use2D:bool = True, confId:int = -1, nBits=2048, + includeChirality:bool = False, use2D:bool = True, confId:int = -1, fpSize:int=2048, useCounts:bool=False, parallel: Union[bool, int] = False,): + self._initializing = True super().__init__(parallel = parallel) - self._useCounts= useCounts - self._confId = confId - self._fromAtoms = fromAtoms - self._ignoreAtoms = ignoreAtoms - self._atomInvariants = atomInvariants - self._minLength = minLength - self._maxLength = maxLength - - self._generate_fp_generator(minLength=minLength, maxLength=maxLength, - includeChirality=includeChirality, use2D=use2D, - nBits=nBits) - - @property - def useCounts(self): - return self._useCounts - - @useCounts.setter - def useCounts(self, value:bool): - self._useCounts = value - - @property - def confId(self): - return self._confId - - @confId.setter - def confId(self, value:int): - self._confId = value - - @property - def fromAtoms(self): - return self._fromAtoms - - @fromAtoms.setter - def fromAtoms(self, value:int): - self._fromAtoms = value - - @property - def ignoreAtoms(self): - return self._ignoreAtoms - - @ignoreAtoms.setter - def ignoreAtoms(self, value:int): - self._ignoreAtoms = value - - @property - def atomInvariants(self): - return self._atomInvariants - - @atomInvariants.setter - def atomInvariants(self, value:int): - self._atomInvariants = value - - @property - def minLength(self): - return self._minLength - - @minLength.setter - def minLength(self, value: int): - self._minLength = value - self._generate_fp_generator(minLength=value, maxLength=self.maxLength, - includeChirality=self.includeChirality, use2D=self.use2D, - nBits=self.nBits) - - @property - def maxLength(self): - return self._maxLength - - @maxLength.setter - def maxLength(self, value: int): - self._maxLength = value - self._generate_fp_generator(minLength=self.minLength, maxLength=value, - includeChirality=self.includeChirality, use2D=self.use2D, - nBits=self.nBits) - - @property - def includeChirality(self): - return self._fpgen.GetOptions().includeChirality - - @includeChirality.setter - def includeChirality(self, value: bool): - self._fpgen.GetOptions().includeChirality = value - - @property - def use2D(self): - return self._fpgen.GetOptions().use2D - - @use2D.setter - def use2D(self, value: bool): - self._fpgen.GetOptions().use2D = value - - @property - def nBits(self): - return self._fpgen.GetOptions().fpSize + self.fpSize = fpSize + self.use2D = use2D + self.includeChirality = includeChirality + self.minLength = minLength + self.maxLength = maxLength - @nBits.setter - def nBits(self, value: int): - self._fpgen.GetOptions().fpSize = value + self.useCounts= useCounts + self.confId = confId + self.fromAtoms = fromAtoms + self.ignoreAtoms = ignoreAtoms + self.atomInvariants = atomInvariants - @property - def nBitsPerEntry(self): - return self._fpgen.GetOptions().numBitsPerFeature + self._generate_fp_generator() + delattr(self, "_initializing") - def _generate_fp_generator(self, minLength, maxLength, includeChirality, use2D, nBits): - self._fpgen = GetAtomPairGenerator(minDistance=minLength, maxDistance=maxLength, - includeChirality=includeChirality, - use2D=use2D, fpSize=nBits) + def _generate_fp_generator(self): + self._fpgen = GetAtomPairGenerator(minDistance=self.minLength, maxDistance=self.maxLength, + includeChirality=self.includeChirality, + use2D=self.use2D, fpSize=self.fpSize) def _transform_mol(self, mol) -> np.array: if self.useCounts: - return self._fpgen.GetCountFingerprintAsNumPy(mol, fromAtoms=self.fromAtoms, ignoreAtoms=self._ignoreAtoms, customAtomInvariants=self._atomInvariants) + return self._fpgen.GetCountFingerprintAsNumPy(mol, fromAtoms=self.fromAtoms, ignoreAtoms=self.ignoreAtoms, customAtomInvariants=self.atomInvariants) else: - return self._fpgen.GetFingerprintAsNumPy(mol, fromAtoms=self.fromAtoms, ignoreAtoms=self._ignoreAtoms, customAtomInvariants=self._atomInvariants) + return self._fpgen.GetFingerprintAsNumPy(mol, fromAtoms=self.fromAtoms, ignoreAtoms=self.ignoreAtoms, customAtomInvariants=self.atomInvariants) class RDKitFPGeneratorTransformer(FpsGeneratorTransformer): + _regenerate_on_properties = ("minPath", "maxPath", "useHs", "branchedPaths", "useBondOrder", "countSimulation", "fpSize", "countBounds", + "numBitsPerFeature") + def __init__(self, minPath:int = 1, maxPath:int =7, useHs:bool = True, branchedPaths:bool = True, useBondOrder:bool = True, countSimulation:bool = False, countBounds = None, - nBits:int = 2048, numBitsPerFeature:int = 2, + fpSize:int = 2048, numBitsPerFeature:int = 2, useCounts:bool = False, parallel: Union[bool, int] = False ): """Calculates the RDKit fingerprints @@ -1125,88 +932,27 @@ def __init__(self, minPath:int = 1, maxPath:int =7, useHs:bool = True, branchedP if set, use count simulation while generating the fingerprint, by default False countBounds : _type_, optional boundaries for count simulation, corresponding bit will be set if the count is higher than the number provided for that spot, by default None - nBits : int, optional + fpSize : int, optional size of the generated fingerprint, does not affect the sparse versions, by default 2048 numBitsPerFeature : int, optional the number of bits set per path/subgraph found, by default 2 """ + self._initializing = True super().__init__(parallel = parallel) - self._useCounts = useCounts - self._countBounds = countBounds - self._generate_fp_generator( minPath=minPath, maxPath=maxPath, useHs=useHs, - branchedPaths=branchedPaths,useBondOrder=useBondOrder, - countSimulation=countSimulation, fpSize=nBits, - countBounds=countBounds, numBitsPerFeature=numBitsPerFeature) + self.minPath = minPath + self.maxPath = maxPath + self.useHs = useHs + self.branchedPaths = branchedPaths + self.useBondOrder = useBondOrder + self.countSimulation = countSimulation + self.fpSize = fpSize + self.numBitsPerFeature = numBitsPerFeature + self.countBounds = countBounds + self.useCounts = useCounts - @property - def nBits(self): - return self._fpgen.GetOptions().fpSize - @nBits.setter - def nBits(self, value: int): - self._fpgen.GetOptions().fpSize = value - @property - def minPath(self): - return self._fpgen.GetOptions().minPath - @minPath.setter - def minPath(self, value:int): - self._fpgen.GetOptions().minPath = value - @property - def maxPath(self): - return self._fpgen.GetOptions().maxPath - @maxPath.setter - def maxPath(self, value:int): - self._fpgen.GetOptions().maxPath = value - @property - def useHs(self): - return self._fpgen.GetOptions().useHs - @useHs.setter - def useHs(self, value:bool): - self._fpgen.GetOptions().useHs = value - @property - def branchedPaths(self): - return self._fpgen.GetOptions().branchedPaths - @branchedPaths.setter - def branchedPaths(self, value:int): - self._fpgen.GetOptions().branchedPaths = value - @property - def useBondOrder(self): - return self._fpgen.GetOptions().useBondOrder - @useBondOrder.setter - def useBondOrder(self, value:int): - self._fpgen.GetOptions().useBondOrder = value - @property - def numBitsPerFeature(self): - return self._fpgen.GetOptions().numBitsPerFeature - @numBitsPerFeature.setter - def numBitsPerFeature(self, value:int): - self._fpgen.GetOptions().numBitsPerFeature = value - @property - def countBounds(self): - return self._countBounds - @countBounds.setter - def countBounds(self, value:int): - self._countBounds = value - self._generate_fp_generator(minPath=self.minPath, maxPath=self.maxPath, useHs=self.useHs, - branchedPaths=self.branchedPaths,useBondOrder=self.useBondOrder, - countSimulation=self.countSimulation, fpSize=self.nBits, - countBounds=value, numBitsPerFeature=self.numBitsPerFeature) - @property - def countSimulation(self): - return self._countBounds - @countSimulation.setter - def countSimulation(self, value: bool): - self._countSimulation=value - self._generate_fp_generator(minPath=self.minPath, maxPath=self.maxPath, useHs=self.useHs, - branchedPaths=self.branchedPaths,useBondOrder=self.useBondOrder, - countSimulation=value, fpSize=self.nBits, - countBounds=self.countBounds, numBitsPerFeature=self.numBitsPerFeature) - @property - def useCounts(self): - return self._useCounts - @useCounts.setter - def useCounts(self, value:bool): - self._useCounts = value + self._generate_fp_generator() + delattr(self, "_initializing") def _transform_mol(self, mol) -> np.array: if self.useCounts: @@ -1214,10 +960,8 @@ def _transform_mol(self, mol) -> np.array: else: return self._fpgen.GetFingerprintAsNumPy(mol) - def _generate_fp_generator(self, minPath, maxPath, useHs, branchedPaths, - useBondOrder, countSimulation, fpSize, countBounds, - numBitsPerFeature): - self._fpgen = GetRDKitFPGenerator(minPath=minPath, maxPath=maxPath, useHs=useHs, - branchedPaths=branchedPaths,useBondOrder=useBondOrder, - countSimulation=countSimulation, fpSize=fpSize, - countBounds=countBounds, numBitsPerFeature=numBitsPerFeature) + def _generate_fp_generator(self): + self._fpgen = GetRDKitFPGenerator(minPath=self.minPath, maxPath=self.maxPath, useHs=self.useHs, + branchedPaths=self.branchedPaths,useBondOrder=self.useBondOrder, + countSimulation=self.countSimulation, fpSize=self.fpSize, + countBounds=self.countBounds, numBitsPerFeature=self.numBitsPerFeature) diff --git a/tests/test_fptransformers.py b/tests/test_fptransformers.py index 9a9c27a..4ad1e9d 100644 --- a/tests/test_fptransformers.py +++ b/tests/test_fptransformers.py @@ -131,34 +131,20 @@ def test_set_params( ]: params = t.get_params() # change extracted dictionary - params["nBits"] = 4242 + params["fpSize"] = 4242 # change params in transformer - t.set_params(nBits=4242) + t.set_params(fpSize=4242) # get parameters as dictionary and assert that it is the same params_2 = t.get_params() assert all([params[key] == params_2[key] for key in params.keys()]) - for t in [rdkit_transformer]: + for t in [rdkit_transformer, secfp_transformer, mhfp_transformer]: params = t.get_params() params["fpSize"] = 4242 t.set_params(fpSize=4242) params_2 = t.get_params() assert all([params[key] == params_2[key] for key in params.keys()]) - for t in [secfp_transformer]: - params = t.get_params() - params["length"] = 4242 - t.set_params(length=4242) - params_2 = t.get_params() - assert all([params[key] == params_2[key] for key in params.keys()]) - - for t in [mhfp_transformer]: - params = t.get_params() - params["n_permutations"] = 4242 - t.set_params(n_permutations=4242) - params_2 = t.get_params() - assert all([params[key] == params_2[key] for key in params.keys()]) - def test_transform( mols_container, @@ -183,21 +169,13 @@ def test_transform( avalon_transformer, ]: params = t.get_params() + print(type(t), params) fps = t.transform(mols_container) # Assert that the same length of input and output assert len(fps) == len(mols_container) # assert that the size of the fingerprint is the expected size - if ( - type(t) == type(maccs_transformer) - or type(t) == type(secfp_transformer) - or type(t) == type(mhfp_transformer) - ): - fpsize = t.nBits - elif type(t) == type(rdkit_transformer): - fpsize = params["fpSize"] - else: - fpsize = params["nBits"] + fpsize = params["fpSize"] assert len(fps[0]) == fpsize @@ -231,16 +209,7 @@ def test_transform_parallel( assert len(fps) == len(mols_container) # assert that the size of the fingerprint is the expected size - if ( - type(t) == type(maccs_transformer) - or type(t) == type(secfp_transformer) - or type(t) == type(mhfp_transformer) - ): - fpsize = t.nBits - elif type(t) == type(rdkit_transformer): - fpsize = params["fpSize"] - else: - fpsize = params["nBits"] + fpsize = params["fpSize"] assert len(fps[0]) == fpsize @@ -306,7 +275,7 @@ def assert_transformer_set_params(tr_class, new_params, mols_list): def test_morgan_set_params(chiral_mols_list): new_params = { - "nBits": 1024, + "fpSize": 1024, "radius": 1, "useBondTypes": False, # TODO, why doesn't this change the FP? "useChirality": True, @@ -328,7 +297,7 @@ def test_atompairs_set_params(chiral_mols_list): "includeChirality": True, "maxLength": 3, "minLength": 3, - "nBits": 1024, + "fpSize": 1024, "nBitsPerEntry": 3, #'use2D': True, #TODO, understand why this can't be set different "useCounts": True, @@ -344,7 +313,7 @@ def test_topologicaltorsion_set_params(chiral_mols_list): #'fromAtoms': 0, #'ignoreAtoms': 0, #'includeChirality': True, #TODO, figure out why this setting seems to give same FP wheter toggled or not - "nBits": 1024, + "fpSize": 1024, "nBitsPerEntry": 3, "targetSize": 5, "useCounts": True, @@ -376,7 +345,7 @@ def test_SECFingerprintTransformer(chiral_mols_list): new_params = { "isomeric": True, "kekulize": True, - "length": 1048, + "fpSize": 1048, "min_radius": 2, #'n_permutations': 2, # The SECFp is not using this setting "radius": 2, @@ -395,7 +364,7 @@ def test_MHFingerprintTransformer(chiral_mols_list): "isomeric": True, "kekulize": True, "min_radius": 2, - "n_permutations": 4096, + "fpSize": 4096, "seed": 44, } assert_transformer_set_params( @@ -405,7 +374,7 @@ def test_MHFingerprintTransformer(chiral_mols_list): def test_AvalonFingerprintTransformer(chiral_mols_list): new_params = { - "nBits": 1024, + "fpSize": 1024, "isQuery": True, # 'resetVect': True, #TODO: this doesn't change the FP "bitFlags": 32767, diff --git a/tests/test_fptransformersgenerator.py b/tests/test_fptransformersgenerator.py index f11ea95..81da19c 100644 --- a/tests/test_fptransformersgenerator.py +++ b/tests/test_fptransformersgenerator.py @@ -60,9 +60,9 @@ def test_set_params(transformer_class): transformer = transformer_class() params = transformer.get_params() #change extracted dictionary - params['nBits'] = 4242 + params['fpSize'] = 4242 #change params in transformer - transformer.set_params(nBits = 4242) + transformer.set_params(fpSize = 4242) # get parameters as dictionary and assert that it is the same params_2 = transformer.get_params() assert all([ params[key] == params_2[key] for key in params.keys()]) @@ -76,7 +76,7 @@ def test_transform(mols_container, transformer_class): #Assert that the same length of input and output assert len(fps) == len(mols_container) - fpsize = params['nBits'] + fpsize = params['fpSize'] assert len(fps[0]) == fpsize @@ -90,7 +90,7 @@ def test_transform_parallel(mols_container, transformer_class): #Assert that the same length of input and output assert len(fps) == len(mols_container) - fpsize = params['nBits'] + fpsize = params['fpSize'] assert len(fps[0]) == fpsize @@ -134,7 +134,7 @@ def assert_transformer_set_params(transfomer, new_params, mols_list): def test_morgan_set_params(chiral_mols_list): - new_params = {'nBits': 1024, + new_params = {'fpSize': 1024, 'radius': 1, 'useBondTypes': False,# TODO, why doesn't this change the FP? 'useChirality': True, @@ -153,7 +153,7 @@ def test_atompairs_set_params(chiral_mols_list): 'includeChirality': True, 'maxLength': 3, 'minLength': 3, - 'nBits': 1024, + 'fpSize': 1024, #'nBitsPerEntry': 3, #Todo: not setable with the generators? #'use2D': True, #TODO, understand why this can't be set different 'useCounts': True} @@ -166,7 +166,7 @@ def test_topologicaltorsion_set_params(chiral_mols_list): #'fromAtoms': 0, #'ignoreAtoms': 0, #'includeChirality': True, #TODO, figure out why this setting seems to give same FP wheter toggled or not - 'nBits': 1024, + 'fpSize': 1024, #'nBitsPerEntry': 3, #Todo: not setable with the generators? 'targetSize': 5, 'useCounts': True} @@ -178,7 +178,7 @@ def test_RDKitFPTransformer(chiral_mols_list): #'branchedPaths': False, #'countBounds': 0, #TODO: What does this do? 'countSimulation': True, - 'nBits': 1024, + 'fpSize': 1024, 'maxPath': 3, 'minPath': 2, 'numBitsPerFeature': 3, diff --git a/tests/test_safeinferencemode.py b/tests/test_safeinferencemode.py index 921cc0f..c9b4ca1 100644 --- a/tests/test_safeinferencemode.py +++ b/tests/test_safeinferencemode.py @@ -104,12 +104,12 @@ def test_safeinference_wrapper_pandas_output( result = smiles_pipeline[:-1].fit_transform(X_smiles) assert isinstance(result, pd.DataFrame) assert result.shape[0] == len(X_smiles) - assert result.shape[1] == smiles_pipeline.named_steps["FP"].nBits + assert result.shape[1] == smiles_pipeline.named_steps["FP"].fpSize @skip_pandas_output_test def test_safeinference_wrapper_get_feature_names_out(smiles_pipeline): # Get feature names from the FP step feature_names = smiles_pipeline.named_steps["FP"].get_feature_names_out() - assert len(feature_names) == smiles_pipeline.named_steps["FP"].nBits + assert len(feature_names) == smiles_pipeline.named_steps["FP"].fpSize assert all(isinstance(name, str) for name in feature_names) diff --git a/tests/test_transformers.py b/tests/test_transformers.py index 143ecd3..fa65504 100644 --- a/tests/test_transformers.py +++ b/tests/test_transformers.py @@ -96,11 +96,12 @@ def test_transformer_pandas_output(SLC6A4_subset, pandas_output): X_transformed = pipeline.transform(X_smiles) assert isinstance(X_transformed, pd.DataFrame), f"the output of {FP_name} is not a pandas dataframe" assert X_transformed.shape[0] == len(X_smiles), f"the number of rows in the output of {FP_name} is not equal to the number of samples" - assert len(X_transformed.columns) == pipeline.named_steps["FP"].nBits, f"the number of columns in the output of {FP_name} is not equal to the number of bits" + assert len(X_transformed.columns) == pipeline.named_steps["FP"].fpSize, f"the number of columns in the output of {FP_name} is not equal to the number of bits" print(f"\nfitting and transforming completed") - except: + except Exception as err: print(f"\n!!!! FAILED pipeline fitting and transforming for {FP_name} with useCounts={useCounts}") + print("\n".join(err.args)) failed_FP.append(FP_name) pass @@ -136,7 +137,7 @@ def test_combined_transformer_pandas_out(combined_transformer, SLC6A4_subset_wit pipeline_skmol = combined_transformer.named_transformers_["pipeline-1"] featurizer_skmol = pipeline_skmol[-1] if isinstance(featurizer_skmol, FpsTransformer): - n_skmol_features = featurizer_skmol.nBits + n_skmol_features = featurizer_skmol.fpSize elif isinstance(featurizer_skmol, MolecularDescriptorTransformer): n_skmol_features = len(featurizer_skmol.desc_list) else: From d420cbde3ec5222c4ba36a4e3511b567101310a6 Mon Sep 17 00:00:00 2001 From: riesben Date: Thu, 14 Nov 2024 22:48:15 +0100 Subject: [PATCH 07/24] Remodelling transformers: - moving code around for easier oversight - adding nicer dpecrecation warnings. --- scikit_mol/fingerprints.py | 450 ++++++++++++++++++------------------- 1 file changed, 223 insertions(+), 227 deletions(-) diff --git a/scikit_mol/fingerprints.py b/scikit_mol/fingerprints.py index a96e65b..a6f90bc 100644 --- a/scikit_mol/fingerprints.py +++ b/scikit_mol/fingerprints.py @@ -2,9 +2,8 @@ import multiprocessing import re import inspect -from typing import Callable +from warnings import warn from typing import Union -from rdkit import Chem from rdkit import DataStructs # from rdkit.Chem.AllChem import GetMorganFingerprintAsBitVect @@ -33,7 +32,6 @@ r"^(?P\w+)FingerprintTransformer$" ) - class FpsTransformer(ABC, BaseEstimator, TransformerMixin): def __init__( self, @@ -220,200 +218,6 @@ def _mol2fp(self, mol): return rdMolDescriptors.GetMACCSKeysFingerprint(mol) -class RDKitFingerprintTransformer(FpsTransformer): - def __init__( - self, - minPath: int = 1, - maxPath: int = 7, - useHs: bool = True, - branchedPaths: bool = True, - useBondOrder: bool = True, - countSimulation: bool = False, - countBounds=None, - fpSize: int = 2048, - numBitsPerFeature: int = 2, - atomInvariantsGenerator=None, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - dtype: np.dtype = np.int8, - ): - """Calculates the RDKit fingerprints - - Parameters - ---------- - minPath : int, optional - the minimum path length (in bonds) to be included, by default 1 - maxPath : int, optional - the maximum path length (in bonds) to be included, by default 7 - useHs : bool, optional - toggles inclusion of Hs in paths (if the molecule has explicit Hs), by default True - branchedPaths : bool, optional - toggles generation of branched subgraphs, not just linear paths, by default True - useBondOrder : bool, optional - toggles inclusion of bond orders in the path hashes, by default True - countSimulation : bool, optional - if set, use count simulation while generating the fingerprint, by default False - countBounds : _type_, optional - boundaries for count simulation, corresponding bit will be set if the count is higher than the number provided for that spot, by default None - fpSize : int, optional - size of the generated fingerprint, does not affect the sparse versions, by default 2048 - numBitsPerFeature : int, optional - the number of bits set per path/subgraph found, by default 2 - atomInvariantsGenerator : _type_, optional - atom invariants to be used during fingerprint generation, by default None - """ - super().__init__( - parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype - ) - self.minPath = minPath - self.maxPath = maxPath - self.useHs = useHs - self.branchedPaths = branchedPaths - self.useBondOrder = useBondOrder - self.countSimulation = countSimulation - self.countBounds = countBounds - self.fpSize = fpSize - self.numBitsPerFeature = numBitsPerFeature - self.atomInvariantsGenerator = atomInvariantsGenerator - - - def _mol2fp(self, mol): - generator = rdFingerprintGenerator.GetRDKitFPGenerator( - minPath=int(self.minPath), - maxPath=int(self.maxPath), - useHs=bool(self.useHs), - branchedPaths=bool(self.branchedPaths), - useBondOrder=bool(self.useBondOrder), - countSimulation=bool(self.countSimulation), - countBounds=bool(self.countBounds), - fpSize=int(self.fpSize), - numBitsPerFeature=int(self.numBitsPerFeature), - atomInvariantsGenerator=self.atomInvariantsGenerator, - ) - return generator.GetFingerprint(mol) - - -class AtomPairFingerprintTransformer(FpsTransformer): - def __init__( - self, - minLength: int = 1, - maxLength: int = 30, - fromAtoms=0, - ignoreAtoms=0, - atomInvariants=0, - nBitsPerEntry: int = 4, - includeChirality: bool = False, - use2D: bool = True, - confId: int = -1, - fpSize=2048, - useCounts: bool = False, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - dtype: np.dtype = np.int8, - ): - super().__init__( - parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype - ) - self.minLength = minLength - self.maxLength = maxLength - self.fromAtoms = fromAtoms - self.ignoreAtoms = ignoreAtoms - self.atomInvariants = atomInvariants - self.includeChirality = includeChirality - self.use2D = use2D - self.confId = confId - self.fpSize = fpSize - self.nBitsPerEntry = nBitsPerEntry - self.useCounts = useCounts - - print("AtomPairFingerprintTransformer will be replace by AtomPairFPGeneratorTransformer, due to changes in RDKit!") - #raise DeprecationWarning("AtomPairFingerprintTransformer will be replace by AtomPairFPGeneratorTransformer, due to changes in RDKit!") - - - def _mol2fp(self, mol): - if self.useCounts: - return rdMolDescriptors.GetHashedAtomPairFingerprint( - mol, - nBits=int(self.fpSize), - minLength=int(self.minLength), - maxLength=int(self.maxLength), - fromAtoms=self.fromAtoms, - ignoreAtoms=self.ignoreAtoms, - atomInvariants=self.atomInvariants, - includeChirality=bool(self.includeChirality), - use2D=bool(self.use2D), - confId=int(self.confId), - ) - else: - return rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( - mol, - nBits=int(self.fpSize), - minLength=int(self.minLength), - maxLength=int(self.maxLength), - fromAtoms=self.fromAtoms, - ignoreAtoms=self.ignoreAtoms, - atomInvariants=self.atomInvariants, - nBitsPerEntry=int(self.nBitsPerEntry), - includeChirality=bool(self.includeChirality), - use2D=bool(self.use2D), - confId=int(self.confId), - ) - - -class TopologicalTorsionFingerprintTransformer(FpsTransformer): - def __init__( - self, - targetSize: int = 4, - fromAtoms=0, - ignoreAtoms=0, - atomInvariants=0, - includeChirality: bool = False, - nBitsPerEntry: int = 4, - fpSize=2048, - useCounts: bool = False, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - dtype: np.dtype = np.int8, - ): - super().__init__( - parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype - ) - self.targetSize = targetSize - self.fromAtoms = fromAtoms - self.ignoreAtoms = ignoreAtoms - self.atomInvariants = atomInvariants - self.includeChirality = includeChirality - self.nBitsPerEntry = nBitsPerEntry - self.fpSize = fpSize - self.useCounts = useCounts - print("TopologicalTorsionFingerprintTransformer will be replace by TopologicalTorsionFPGeneatorTransformer, due to changes in RDKit!") - #raise DeprecationWarning("AtomPairFingerprintTransformer will be replace by AtomPairFPGeneratorTransformer, due to changes in RDKit!") - - - def _mol2fp(self, mol): - if self.useCounts: - return rdMolDescriptors.GetHashedTopologicalTorsionFingerprint( - mol, - nBits=int(self.fpSize), - targetSize=int(self.targetSize), - fromAtoms=self.fromAtoms, - ignoreAtoms=self.ignoreAtoms, - atomInvariants=self.atomInvariants, - includeChirality=bool(self.includeChirality), - ) - else: - return rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( - mol, - nBits=int(self.fpSize), - targetSize=int(self.targetSize), - fromAtoms=self.fromAtoms, - ignoreAtoms=self.ignoreAtoms, - atomInvariants=self.atomInvariants, - includeChirality=bool(self.includeChirality), - nBitsPerEntry=int(self.nBitsPerEntry), - ) - - class MHFingerprintTransformer(FpsTransformer): def __init__( self, @@ -602,6 +406,61 @@ def length(self): return self.fpSize +class AvalonFingerprintTransformer(FpsTransformer): + # Fingerprint from the Avalon toolkeit, https://doi.org/10.1021/ci050413p + def __init__( + self, + fpSize: int = 512, + isQuery: bool = False, + resetVect: bool = False, + bitFlags: int = 15761407, + useCounts: bool = False, + parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, + dtype: np.dtype = np.int8, + ): + """Transform RDKit mols into Count or bit-based Avalon Fingerprints + + Parameters + ---------- + fpSize : int, optional + Size of the fingerprint, by default 512 + isQuery : bool, optional + use the fingerprint for a query structure, by default False + resetVect : bool, optional + reset vector, by default False NB: only used in GetAvalonFP (not for GetAvalonCountFP) + bitFlags : int, optional + Substructure fingerprint (32767) or similarity fingerprint (15761407) by default 15761407 + useCounts : bool, optional + If toggled will create the count and not bit-based fingerprint, by default False + """ + super().__init__( + parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype + ) + self.fpSize = fpSize + self.isQuery = isQuery + self.resetVect = resetVect + self.bitFlags = bitFlags + self.useCounts = useCounts + + def _mol2fp(self, mol): + if self.useCounts: + return pyAvalonTools.GetAvalonCountFP( + mol, + nBits=int(self.fpSize), + isQuery=bool(self.isQuery), + bitFlags=int(self.bitFlags), + ) + else: + return pyAvalonTools.GetAvalonFP( + mol, + nBits=int(self.fpSize), + isQuery=bool(self.isQuery), + resetVect=bool(self.resetVect), + bitFlags=int(self.bitFlags), + ) + + class MorganFingerprintTransformer(FpsTransformer): def __init__( self, @@ -642,9 +501,7 @@ def __init__( self.useFeatures = useFeatures self.useCounts = useCounts - print("MorganFingerprintTransformer will be replace by MorganGeneratorTransformer, due to changes in RDKit!") - #raise DeprecationWarning("MorganFingerprintTransformer will be replace by MorganFPGeneratorTransformer, due to changes in RDKit!") - + warn("MorganFingerprintTransformer will be replace by MorganGeneratorTransformer, due to changes in RDKit!", DeprecationWarning) def _mol2fp(self, mol): if self.useCounts: @@ -667,58 +524,196 @@ def _mol2fp(self, mol): ) -class AvalonFingerprintTransformer(FpsTransformer): - # Fingerprint from the Avalon toolkeit, https://doi.org/10.1021/ci050413p +class RDKitFingerprintTransformer(FpsTransformer): def __init__( self, - fpSize: int = 512, - isQuery: bool = False, - resetVect: bool = False, - bitFlags: int = 15761407, - useCounts: bool = False, + minPath: int = 1, + maxPath: int = 7, + useHs: bool = True, + branchedPaths: bool = True, + useBondOrder: bool = True, + countSimulation: bool = False, + countBounds=None, + fpSize: int = 2048, + numBitsPerFeature: int = 2, + atomInvariantsGenerator=None, parallel: Union[bool, int] = False, safe_inference_mode: bool = False, dtype: np.dtype = np.int8, ): - """Transform RDKit mols into Count or bit-based Avalon Fingerprints + """Calculates the RDKit fingerprints Parameters ---------- + minPath : int, optional + the minimum path length (in bonds) to be included, by default 1 + maxPath : int, optional + the maximum path length (in bonds) to be included, by default 7 + useHs : bool, optional + toggles inclusion of Hs in paths (if the molecule has explicit Hs), by default True + branchedPaths : bool, optional + toggles generation of branched subgraphs, not just linear paths, by default True + useBondOrder : bool, optional + toggles inclusion of bond orders in the path hashes, by default True + countSimulation : bool, optional + if set, use count simulation while generating the fingerprint, by default False + countBounds : _type_, optional + boundaries for count simulation, corresponding bit will be set if the count is higher than the number provided for that spot, by default None fpSize : int, optional - Size of the fingerprint, by default 512 - isQuery : bool, optional - use the fingerprint for a query structure, by default False - resetVect : bool, optional - reset vector, by default False NB: only used in GetAvalonFP (not for GetAvalonCountFP) - bitFlags : int, optional - Substructure fingerprint (32767) or similarity fingerprint (15761407) by default 15761407 - useCounts : bool, optional - If toggled will create the count and not bit-based fingerprint, by default False + size of the generated fingerprint, does not affect the sparse versions, by default 2048 + numBitsPerFeature : int, optional + the number of bits set per path/subgraph found, by default 2 + atomInvariantsGenerator : _type_, optional + atom invariants to be used during fingerprint generation, by default None """ super().__init__( parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype ) + self.minPath = minPath + self.maxPath = maxPath + self.useHs = useHs + self.branchedPaths = branchedPaths + self.useBondOrder = useBondOrder + self.countSimulation = countSimulation + self.countBounds = countBounds self.fpSize = fpSize - self.isQuery = isQuery - self.resetVect = resetVect - self.bitFlags = bitFlags + self.numBitsPerFeature = numBitsPerFeature + self.atomInvariantsGenerator = atomInvariantsGenerator + + warn("RDKitFingerprintTransformer will be replace by RDKitFPGeneratorTransformer, due to changes in RDKit!", DeprecationWarning) + + + def _mol2fp(self, mol): + generator = rdFingerprintGenerator.GetRDKitFPGenerator( + minPath=int(self.minPath), + maxPath=int(self.maxPath), + useHs=bool(self.useHs), + branchedPaths=bool(self.branchedPaths), + useBondOrder=bool(self.useBondOrder), + countSimulation=bool(self.countSimulation), + countBounds=bool(self.countBounds), + fpSize=int(self.fpSize), + numBitsPerFeature=int(self.numBitsPerFeature), + atomInvariantsGenerator=self.atomInvariantsGenerator, + ) + return generator.GetFingerprint(mol) + + +class AtomPairFingerprintTransformer(FpsTransformer): + def __init__( + self, + minLength: int = 1, + maxLength: int = 30, + fromAtoms=0, + ignoreAtoms=0, + atomInvariants=0, + nBitsPerEntry: int = 4, + includeChirality: bool = False, + use2D: bool = True, + confId: int = -1, + fpSize=2048, + useCounts: bool = False, + parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, + dtype: np.dtype = np.int8, + ): + super().__init__( + parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype + ) + self.minLength = minLength + self.maxLength = maxLength + self.fromAtoms = fromAtoms + self.ignoreAtoms = ignoreAtoms + self.atomInvariants = atomInvariants + self.includeChirality = includeChirality + self.use2D = use2D + self.confId = confId + self.fpSize = fpSize + self.nBitsPerEntry = nBitsPerEntry self.useCounts = useCounts + warn("AtomPairFingerprintTransformer will be replace by AtomPairFPGeneratorTransformer, due to changes in RDKit!", DeprecationWarning) + def _mol2fp(self, mol): if self.useCounts: - return pyAvalonTools.GetAvalonCountFP( + return rdMolDescriptors.GetHashedAtomPairFingerprint( mol, nBits=int(self.fpSize), - isQuery=bool(self.isQuery), - bitFlags=int(self.bitFlags), + minLength=int(self.minLength), + maxLength=int(self.maxLength), + fromAtoms=self.fromAtoms, + ignoreAtoms=self.ignoreAtoms, + atomInvariants=self.atomInvariants, + includeChirality=bool(self.includeChirality), + use2D=bool(self.use2D), + confId=int(self.confId), ) else: - return pyAvalonTools.GetAvalonFP( + return rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( mol, nBits=int(self.fpSize), - isQuery=bool(self.isQuery), - resetVect=bool(self.resetVect), - bitFlags=int(self.bitFlags), + minLength=int(self.minLength), + maxLength=int(self.maxLength), + fromAtoms=self.fromAtoms, + ignoreAtoms=self.ignoreAtoms, + atomInvariants=self.atomInvariants, + nBitsPerEntry=int(self.nBitsPerEntry), + includeChirality=bool(self.includeChirality), + use2D=bool(self.use2D), + confId=int(self.confId), + ) + + +class TopologicalTorsionFingerprintTransformer(FpsTransformer): + def __init__( + self, + targetSize: int = 4, + fromAtoms=0, + ignoreAtoms=0, + atomInvariants=0, + includeChirality: bool = False, + nBitsPerEntry: int = 4, + fpSize=2048, + useCounts: bool = False, + parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, + dtype: np.dtype = np.int8, + ): + super().__init__( + parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype + ) + self.targetSize = targetSize + self.fromAtoms = fromAtoms + self.ignoreAtoms = ignoreAtoms + self.atomInvariants = atomInvariants + self.includeChirality = includeChirality + self.nBitsPerEntry = nBitsPerEntry + self.fpSize = fpSize + self.useCounts = useCounts + + warn("TopologicalTorsionFingerprintTransformer will be replace by TopologicalTorsionFPGeneatorTransformer, due to changes in RDKit!", DeprecationWarning) + + def _mol2fp(self, mol): + if self.useCounts: + return rdMolDescriptors.GetHashedTopologicalTorsionFingerprint( + mol, + nBits=int(self.fpSize), + targetSize=int(self.targetSize), + fromAtoms=self.fromAtoms, + ignoreAtoms=self.ignoreAtoms, + atomInvariants=self.atomInvariants, + includeChirality=bool(self.includeChirality), + ) + else: + return rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( + mol, + nBits=int(self.fpSize), + targetSize=int(self.targetSize), + fromAtoms=self.fromAtoms, + ignoreAtoms=self.ignoreAtoms, + atomInvariants=self.atomInvariants, + includeChirality=bool(self.includeChirality), + nBitsPerEntry=int(self.nBitsPerEntry), ) @@ -732,6 +727,7 @@ def parallel_helper(args): transformer = getattr(fingerprints, classname)(**parameters) return transformer._transform(X_mols) + class FpsGeneratorTransformer(FpsTransformer): _regenerate_on_properties = () From f7d2958b91f24930288f3c5a68c7bdb23a80a633 Mon Sep 17 00:00:00 2001 From: riesben Date: Fri, 15 Nov 2024 07:03:12 +0100 Subject: [PATCH 08/24] Remodelling transformers: - add new generator functions to transformer test --- tests/test_transformers.py | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/tests/test_transformers.py b/tests/test_transformers.py index fa65504..b96d421 100644 --- a/tests/test_transformers.py +++ b/tests/test_transformers.py @@ -15,9 +15,11 @@ from sklearn.ensemble import RandomForestRegressor from scikit_mol.conversions import SmilesToMolTransformer from scikit_mol.core import SKLEARN_VERSION_PANDAS_OUT -from scikit_mol.fingerprints import FpsTransformer, MACCSKeysFingerprintTransformer, RDKitFingerprintTransformer, AtomPairFingerprintTransformer, \ - TopologicalTorsionFingerprintTransformer, MorganFingerprintTransformer, SECFingerprintTransformer, \ - MHFingerprintTransformer, AvalonFingerprintTransformer +from scikit_mol.fingerprints import (FpsTransformer, MACCSKeysFingerprintTransformer, RDKitFingerprintTransformer, AtomPairFingerprintTransformer, + TopologicalTorsionFingerprintTransformer, MorganFingerprintTransformer, SECFingerprintTransformer, + MHFingerprintTransformer, AvalonFingerprintTransformer, MorganFPGeneratorTransformer, + RDKitFPGeneratorTransformer, AtomPairFPGeneratorTransformer, TopologicalTorsionFPGeneatorTransformer) + from scikit_mol.descriptors import MolecularDescriptorTransformer from fixtures import SLC6A4_subset, SLC6A4_subset_with_cddd, skip_pandas_output_test, mols_container, featurizer, combined_transformer @@ -29,6 +31,9 @@ def test_transformer(SLC6A4_subset): X_train, X_test = X_smiles[:128], X_smiles[128:] Y_train, Y_test = Y[:128], Y[128:] + MorganFPGeneratorTransformer, + RDKitFPGeneratorTransformer, AtomPairFPGeneratorTransformer, TopologicalTorsionFPGeneatorTransformer + # run FP with default parameters except when useCounts can be given as an argument FP_dict = {"MACCSTransformer": [MACCSKeysFingerprintTransformer, None], "RDKitFPTransformer": [RDKitFingerprintTransformer, None], @@ -40,7 +45,15 @@ def test_transformer(SLC6A4_subset): "MorganTransformer useCounts": [MorganFingerprintTransformer, True], "SECFingerprintTransformer": [SECFingerprintTransformer, None], "MHFingerprintTransformer": [MHFingerprintTransformer, None], - 'AvalonFingerprintTransformer': [AvalonFingerprintTransformer, None]} + 'AvalonFingerprintTransformer': [AvalonFingerprintTransformer, None], + 'MorganFPGeneratorTransformer': [MorganFPGeneratorTransformer, True], + 'MorganFPGeneratorTransformer': [MorganFPGeneratorTransformer, False], + 'RDKitFPGeneratorTransformer': [RDKitFPGeneratorTransformer, None], + 'AtomPairFPGeneratorTransformer': [AtomPairFPGeneratorTransformer, True], + 'AtomPairFPGeneratorTransformer': [ AtomPairFPGeneratorTransformer, False], + 'TopologicalTorsionFPGeneatorTransformer': [TopologicalTorsionFPGeneatorTransformer, True], + 'TopologicalTorsionFPGeneatorTransformer': [ TopologicalTorsionFPGeneatorTransformer, False], + } # fit on toy data and print train/test score if successful or collect the failed FP failed_FP = [] @@ -81,7 +94,22 @@ def test_transformer_pandas_output(SLC6A4_subset, pandas_output): "MorganTransformer useCounts": [MorganFingerprintTransformer, True], "SECFingerprintTransformer": [SECFingerprintTransformer, None], "MHFingerprintTransformer": [MHFingerprintTransformer, None], - 'AvalonFingerprintTransformer': [AvalonFingerprintTransformer, None]} + 'AvalonFingerprintTransformer': [AvalonFingerprintTransformer, None], + 'MorganFPGeneratorTransformer': [MorganFPGeneratorTransformer, + True], + 'MorganFPGeneratorTransformer': [MorganFPGeneratorTransformer, + False], + 'RDKitFPGeneratorTransformer': [RDKitFPGeneratorTransformer, + None], + 'AtomPairFPGeneratorTransformer': [ + AtomPairFPGeneratorTransformer, True], + 'AtomPairFPGeneratorTransformer': [ + AtomPairFPGeneratorTransformer, False], + 'TopologicalTorsionFPGeneatorTransformer': [ + TopologicalTorsionFPGeneatorTransformer, True], + 'TopologicalTorsionFPGeneatorTransformer': [ + TopologicalTorsionFPGeneatorTransformer, False], + } # fit on toy data and check that the output is a pandas dataframe failed_FP = [] From 5ae6a2b326f9a47fcd6174be0b84cae9eeaccab4 Mon Sep 17 00:00:00 2001 From: riesben Date: Fri, 15 Nov 2024 07:16:15 +0100 Subject: [PATCH 09/24] Remodelling transformers: - add DeprecationWarnings to not harmonized fpSize bits. --- scikit_mol/fingerprints.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/scikit_mol/fingerprints.py b/scikit_mol/fingerprints.py index a6f90bc..bea43e0 100644 --- a/scikit_mol/fingerprints.py +++ b/scikit_mol/fingerprints.py @@ -45,6 +45,17 @@ def __init__( self.safe_inference_mode = safe_inference_mode self.dtype = dtype + + @property + def nBits(self): + warn("nBits will be replace by fpSize, due to changes harmonization!", DeprecationWarning) + return self.fpSize + + @nBits.setter + def nBits(self, nBits): + warn("nBits will be replace by fpSize, due to changes harmonization!", DeprecationWarning) + self.fpSize = nBits + def _get_column_prefix(self) -> str: matched = _PATTERN_FINGERPRINT_TRANSFORMER.match(type(self).__name__) if matched: @@ -299,10 +310,12 @@ def seed(self, seed): @property def n_permutations(self): + warn("n_permutations will be replace by fpSize, due to changes harmonization!", DeprecationWarning) return self.fpSize @n_permutations.setter def n_permutations(self, n_permutations): + warn("n_permutations will be replace by fpSize, due to changes harmonization!", DeprecationWarning) self.fpSize = n_permutations # each time the n_permutations parameter is modified refresh an instance of the encoder self._recreate_encoder() @@ -402,7 +415,7 @@ def n_permutations(self, n_permutations): @property def length(self): - # to be compliant with the requirement of the base class + warn("length will be replace by fpSize, due to changes harmonization!", DeprecationWarning) return self.fpSize From 5f91e0cf5edd9d4e27559399e4c5d1de6985e235 Mon Sep 17 00:00:00 2001 From: Esben Jannik Bjerrum Date: Fri, 22 Nov 2024 14:42:11 +0100 Subject: [PATCH 10/24] Preparing file split --- scikit_mol/{fingerprints.py => fingerprints/baseclasses.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scikit_mol/{fingerprints.py => fingerprints/baseclasses.py} (100%) diff --git a/scikit_mol/fingerprints.py b/scikit_mol/fingerprints/baseclasses.py similarity index 100% rename from scikit_mol/fingerprints.py rename to scikit_mol/fingerprints/baseclasses.py From f7b20f173136e95f1c363b85fdb01eb3afafd26d Mon Sep 17 00:00:00 2001 From: Esben Jannik Bjerrum Date: Fri, 22 Nov 2024 14:44:44 +0100 Subject: [PATCH 11/24] Split fingerprint file into smaller for better overview --- scikit_mol/fingerprints/__init__.py | 15 + scikit_mol/fingerprints/atompair.py | 144 ++++ scikit_mol/fingerprints/avalon.py | 62 ++ scikit_mol/fingerprints/baseclasses.py | 784 +----------------- scikit_mol/fingerprints/maccs.py | 41 + scikit_mol/fingerprints/minhash.py | 206 +++++ scikit_mol/fingerprints/morgan.py | 150 ++++ scikit_mol/fingerprints/rdkitfp.py | 175 ++++ scikit_mol/fingerprints/topologicaltorsion.py | 120 +++ 9 files changed, 950 insertions(+), 747 deletions(-) create mode 100644 scikit_mol/fingerprints/__init__.py create mode 100644 scikit_mol/fingerprints/atompair.py create mode 100644 scikit_mol/fingerprints/avalon.py create mode 100644 scikit_mol/fingerprints/maccs.py create mode 100644 scikit_mol/fingerprints/minhash.py create mode 100644 scikit_mol/fingerprints/morgan.py create mode 100644 scikit_mol/fingerprints/rdkitfp.py create mode 100644 scikit_mol/fingerprints/topologicaltorsion.py diff --git a/scikit_mol/fingerprints/__init__.py b/scikit_mol/fingerprints/__init__.py new file mode 100644 index 0000000..5ed655d --- /dev/null +++ b/scikit_mol/fingerprints/__init__.py @@ -0,0 +1,15 @@ +from .baseclasses import ( + FpsTransformer, + FpsGeneratorTransformer, +) # TODO, for backwards compatibility with tests, needs to be removed + +from .atompair import AtomPairFingerprintTransformer, AtomPairFPGeneratorTransformer +from .avalon import AvalonFingerprintTransformer +from .maccs import MACCSKeysFingerprintTransformer +from .minhash import MHFingerprintTransformer, SECFingerprintTransformer +from .morgan import MorganFingerprintTransformer, MorganFPGeneratorTransformer +from .rdkitfp import RDKitFingerprintTransformer, RDKitFPGeneratorTransformer +from .topologicaltorsion import ( + TopologicalTorsionFingerprintTransformer, + TopologicalTorsionFPGeneatorTransformer, +) diff --git a/scikit_mol/fingerprints/atompair.py b/scikit_mol/fingerprints/atompair.py new file mode 100644 index 0000000..aff8f9f --- /dev/null +++ b/scikit_mol/fingerprints/atompair.py @@ -0,0 +1,144 @@ +from typing import Union + +import numpy as np + +from warnings import warn + +from .baseclasses import FpsTransformer, FpsGeneratorTransformer + +from rdkit.Chem.rdFingerprintGenerator import GetAtomPairGenerator +from rdkit.Chem import rdMolDescriptors + + +class AtomPairFingerprintTransformer(FpsTransformer): + def __init__( + self, + minLength: int = 1, + maxLength: int = 30, + fromAtoms=0, + ignoreAtoms=0, + atomInvariants=0, + nBitsPerEntry: int = 4, + includeChirality: bool = False, + use2D: bool = True, + confId: int = -1, + fpSize=2048, + useCounts: bool = False, + parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, + dtype: np.dtype = np.int8, + ): + super().__init__( + parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype + ) + self.minLength = minLength + self.maxLength = maxLength + self.fromAtoms = fromAtoms + self.ignoreAtoms = ignoreAtoms + self.atomInvariants = atomInvariants + self.includeChirality = includeChirality + self.use2D = use2D + self.confId = confId + self.fpSize = fpSize + self.nBitsPerEntry = nBitsPerEntry + self.useCounts = useCounts + + warn( + "AtomPairFingerprintTransformer will be replace by AtomPairFPGeneratorTransformer, due to changes in RDKit!", + DeprecationWarning, + ) + + def _mol2fp(self, mol): + if self.useCounts: + return rdMolDescriptors.GetHashedAtomPairFingerprint( + mol, + nBits=int(self.fpSize), + minLength=int(self.minLength), + maxLength=int(self.maxLength), + fromAtoms=self.fromAtoms, + ignoreAtoms=self.ignoreAtoms, + atomInvariants=self.atomInvariants, + includeChirality=bool(self.includeChirality), + use2D=bool(self.use2D), + confId=int(self.confId), + ) + else: + return rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( + mol, + nBits=int(self.fpSize), + minLength=int(self.minLength), + maxLength=int(self.maxLength), + fromAtoms=self.fromAtoms, + ignoreAtoms=self.ignoreAtoms, + atomInvariants=self.atomInvariants, + nBitsPerEntry=int(self.nBitsPerEntry), + includeChirality=bool(self.includeChirality), + use2D=bool(self.use2D), + confId=int(self.confId), + ) + + +class AtomPairFPGeneratorTransformer(FpsGeneratorTransformer): + _regenerate_on_properties = ( + "fpSize", + "includeChirality", + "use2D", + "minLength", + "maxLength", + ) + + def __init__( + self, + minLength: int = 1, + maxLength: int = 30, + fromAtoms=None, + ignoreAtoms=None, + atomInvariants=None, + includeChirality: bool = False, + use2D: bool = True, + confId: int = -1, + fpSize: int = 2048, + useCounts: bool = False, + parallel: Union[bool, int] = False, + ): + self._initializing = True + super().__init__(parallel=parallel) + self.fpSize = fpSize + self.use2D = use2D + self.includeChirality = includeChirality + self.minLength = minLength + self.maxLength = maxLength + + self.useCounts = useCounts + self.confId = confId + self.fromAtoms = fromAtoms + self.ignoreAtoms = ignoreAtoms + self.atomInvariants = atomInvariants + + self._generate_fp_generator() + delattr(self, "_initializing") + + def _generate_fp_generator(self): + self._fpgen = GetAtomPairGenerator( + minDistance=self.minLength, + maxDistance=self.maxLength, + includeChirality=self.includeChirality, + use2D=self.use2D, + fpSize=self.fpSize, + ) + + def _transform_mol(self, mol) -> np.array: + if self.useCounts: + return self._fpgen.GetCountFingerprintAsNumPy( + mol, + fromAtoms=self.fromAtoms, + ignoreAtoms=self.ignoreAtoms, + customAtomInvariants=self.atomInvariants, + ) + else: + return self._fpgen.GetFingerprintAsNumPy( + mol, + fromAtoms=self.fromAtoms, + ignoreAtoms=self.ignoreAtoms, + customAtomInvariants=self.atomInvariants, + ) diff --git a/scikit_mol/fingerprints/avalon.py b/scikit_mol/fingerprints/avalon.py new file mode 100644 index 0000000..074632d --- /dev/null +++ b/scikit_mol/fingerprints/avalon.py @@ -0,0 +1,62 @@ +from typing import Union + +import numpy as np + +from .baseclasses import FpsTransformer + +from rdkit.Avalon import pyAvalonTools + + +class AvalonFingerprintTransformer(FpsTransformer): + # Fingerprint from the Avalon toolkeit, https://doi.org/10.1021/ci050413p + def __init__( + self, + fpSize: int = 512, + isQuery: bool = False, + resetVect: bool = False, + bitFlags: int = 15761407, + useCounts: bool = False, + parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, + dtype: np.dtype = np.int8, + ): + """Transform RDKit mols into Count or bit-based Avalon Fingerprints + + Parameters + ---------- + fpSize : int, optional + Size of the fingerprint, by default 512 + isQuery : bool, optional + use the fingerprint for a query structure, by default False + resetVect : bool, optional + reset vector, by default False NB: only used in GetAvalonFP (not for GetAvalonCountFP) + bitFlags : int, optional + Substructure fingerprint (32767) or similarity fingerprint (15761407) by default 15761407 + useCounts : bool, optional + If toggled will create the count and not bit-based fingerprint, by default False + """ + super().__init__( + parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype + ) + self.fpSize = fpSize + self.isQuery = isQuery + self.resetVect = resetVect + self.bitFlags = bitFlags + self.useCounts = useCounts + + def _mol2fp(self, mol): + if self.useCounts: + return pyAvalonTools.GetAvalonCountFP( + mol, + nBits=int(self.fpSize), + isQuery=bool(self.isQuery), + bitFlags=int(self.bitFlags), + ) + else: + return pyAvalonTools.GetAvalonFP( + mol, + nBits=int(self.fpSize), + isQuery=bool(self.isQuery), + resetVect=bool(self.resetVect), + bitFlags=int(self.bitFlags), + ) diff --git a/scikit_mol/fingerprints/baseclasses.py b/scikit_mol/fingerprints/baseclasses.py index bea43e0..ce28e18 100644 --- a/scikit_mol/fingerprints/baseclasses.py +++ b/scikit_mol/fingerprints/baseclasses.py @@ -10,12 +10,15 @@ from rdkit.Chem import rdMolDescriptors from rdkit.Chem import rdFingerprintGenerator from rdkit.Chem import rdMHFPFingerprint -from rdkit.Avalon import pyAvalonTools -from rdkit.Chem.rdFingerprintGenerator import (GetMorganGenerator, GetMorganFeatureAtomInvGen, - GetTopologicalTorsionGenerator, - GetAtomPairGenerator, - GetRDKitFPGenerator) + +from rdkit.Chem.rdFingerprintGenerator import ( + GetMorganGenerator, + GetMorganFeatureAtomInvGen, + GetTopologicalTorsionGenerator, + GetAtomPairGenerator, + GetRDKitFPGenerator, +) import numpy as np import pandas as pd @@ -32,6 +35,7 @@ r"^(?P\w+)FingerprintTransformer$" ) + class FpsTransformer(ABC, BaseEstimator, TransformerMixin): def __init__( self, @@ -45,15 +49,20 @@ def __init__( self.safe_inference_mode = safe_inference_mode self.dtype = dtype - @property def nBits(self): - warn("nBits will be replace by fpSize, due to changes harmonization!", DeprecationWarning) + warn( + "nBits will be replace by fpSize, due to changes harmonization!", + DeprecationWarning, + ) return self.fpSize @nBits.setter def nBits(self, nBits): - warn("nBits will be replace by fpSize, due to changes harmonization!", DeprecationWarning) + warn( + "nBits will be replace by fpSize, due to changes harmonization!", + DeprecationWarning, + ) self.fpSize = nBits def _get_column_prefix(self) -> str: @@ -193,554 +202,6 @@ def transform(self, X, y=None): return arr -class MACCSKeysFingerprintTransformer(FpsTransformer): - def __init__( - self, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - dtype: np.dtype = np.int8, - fpSize=167, - ): - """MACCS keys fingerprinter - calculates the 167 fixed MACCS keys - """ - super().__init__( - parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype - ) - if fpSize != 167: - raise ValueError( - "fpSize can only be 167, matching the number of defined MACCS keys!" - ) - self._fpSize = fpSize - - @property - def fpSize(self): - return self._fpSize - - @fpSize.setter - def fpSize(self, fpSize): - if fpSize != 167: - raise ValueError( - "fpSize can only be 167, matching the number of defined MACCS keys!" - ) - self._fpSize = fpSize - - def _mol2fp(self, mol): - return rdMolDescriptors.GetMACCSKeysFingerprint(mol) - - -class MHFingerprintTransformer(FpsTransformer): - def __init__( - self, - radius: int = 3, - rings: bool = True, - isomeric: bool = False, - kekulize: bool = False, - min_radius: int = 1, - fpSize: int = 2048, - seed: int = 42, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - dtype: np.dtype = np.int32, - ): - """Transforms the RDKit mol into the MinHash fingerprint (MHFP) - - https://jcheminf.biomedcentral.com/articles/10.1186/s13321-018-0321-8 - - Args: - radius (int, optional): The MHFP radius. Defaults to 3. - rings (bool, optional): Whether or not to include rings in the shingling. Defaults to True. - isomeric (bool, optional): Whether the isomeric SMILES to be considered. Defaults to False. - kekulize (bool, optional): Whether or not to kekulize the extracted SMILES. Defaults to False. - min_radius (int, optional): The minimum radius that is used to extract n-gram. Defaults to 1. - fpSize (int, optional): The number of permutations used for hashing. Defaults to 2048, - this is effectively the length of the FP - seed (int, optional): The value used to seed numpy.random. Defaults to 0. - """ - super().__init__( - parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype - ) - self.radius = radius - self.rings = rings - self.isomeric = isomeric - self.kekulize = kekulize - self.min_radius = min_radius - # Set the .n_permutations and .seed without creating the encoder twice - self.fpSize = fpSize - self._seed = seed - # create the encoder instance - self._recreate_encoder() - - def __getstate__(self): - # Get the state of the parent class - state = super().__getstate__() - # Remove the unpicklable property from the state - state.pop("mhfp_encoder", None) # mhfp_encoder is not picklable - return state - - def __setstate__(self, state): - # Restore the state of the parent class - super().__setstate__(state) - # Re-create the unpicklable property - self._recreate_encoder() - - def _mol2fp(self, mol): - fp = self.mhfp_encoder.EncodeMol( - mol, self.radius, self.rings, self.isomeric, self.kekulize, self.min_radius - ) - return fp - - def _fp2array(self, fp): - return np.array(fp) - - def _recreate_encoder(self): - self.mhfp_encoder = rdMHFPFingerprint.MHFPEncoder( - self.fpSize, self._seed - ) - - @property - def seed(self): - return self._seed - - @seed.setter - def seed(self, seed): - self._seed = seed - # each time the seed parameter is modified refresh an instance of the encoder - self._recreate_encoder() - - @property - def n_permutations(self): - warn("n_permutations will be replace by fpSize, due to changes harmonization!", DeprecationWarning) - return self.fpSize - - @n_permutations.setter - def n_permutations(self, n_permutations): - warn("n_permutations will be replace by fpSize, due to changes harmonization!", DeprecationWarning) - self.fpSize = n_permutations - # each time the n_permutations parameter is modified refresh an instance of the encoder - self._recreate_encoder() - - -class SECFingerprintTransformer(FpsTransformer): - # https://jcheminf.biomedcentral.com/articles/10.1186/s13321-018-0321-8 - def __init__( - self, - radius: int = 3, - rings: bool = True, - isomeric: bool = False, - kekulize: bool = False, - min_radius: int = 1, - fpSize: int = 2048, - n_permutations: int = 0, - seed: int = 0, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - dtype: np.dtype = np.int8, - ): - """Transforms the RDKit mol into the SMILES extended connectivity fingerprint (SECFP) - - Args: - radius (int, optional): The MHFP radius. Defaults to 3. - rings (bool, optional): Whether or not to include rings in the shingling. Defaults to True. - isomeric (bool, optional): Whether the isomeric SMILES to be considered. Defaults to False. - kekulize (bool, optional): Whether or not to kekulize the extracted SMILES. Defaults to False. - min_radius (int, optional): The minimum radius that is used to extract n-gram. Defaults to 1. - fpSize (int, optional): The length of the folded fingerprint. Defaults to 2048. - n_permutations (int, optional): The number of permutations used for hashing. Defaults to 0. - seed (int, optional): The value used to seed numpy.random. Defaults to 0. - """ - super().__init__( - parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype - ) - self.radius = radius - self.rings = rings - self.isomeric = isomeric - self.kekulize = kekulize - self.min_radius = min_radius - self.fpSize = fpSize - # Set the .n_permutations and seed without creating the encoder twice - self._n_permutations = n_permutations - self._seed = seed - # create the encoder instance - self._recreate_encoder() - - def __getstate__(self): - # Get the state of the parent class - state = super().__getstate__() - # Remove the unpicklable property from the state - state.pop("mhfp_encoder", None) # mhfp_encoder is not picklable - return state - - def __setstate__(self, state): - # Restore the state of the parent class - super().__setstate__(state) - # Re-create the unpicklable property - self._recreate_encoder() - - def _mol2fp(self, mol): - return self.mhfp_encoder.EncodeSECFPMol( - mol, - self.radius, - self.rings, - self.isomeric, - self.kekulize, - self.min_radius, - self.length, - ) - - def _recreate_encoder(self): - self.mhfp_encoder = rdMHFPFingerprint.MHFPEncoder( - self._n_permutations, self._seed - ) - - @property - def seed(self): - return self._seed - - @seed.setter - def seed(self, seed): - self._seed = seed - # each time the seed parameter is modified refresh an instace of the encoder - self._recreate_encoder() - - @property - def n_permutations(self): - return self._n_permutations - - @n_permutations.setter - def n_permutations(self, n_permutations): - self._n_permutations = n_permutations - # each time the n_permutations parameter is modified refresh an instace of the encoder - self._recreate_encoder() - - @property - def length(self): - warn("length will be replace by fpSize, due to changes harmonization!", DeprecationWarning) - return self.fpSize - - -class AvalonFingerprintTransformer(FpsTransformer): - # Fingerprint from the Avalon toolkeit, https://doi.org/10.1021/ci050413p - def __init__( - self, - fpSize: int = 512, - isQuery: bool = False, - resetVect: bool = False, - bitFlags: int = 15761407, - useCounts: bool = False, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - dtype: np.dtype = np.int8, - ): - """Transform RDKit mols into Count or bit-based Avalon Fingerprints - - Parameters - ---------- - fpSize : int, optional - Size of the fingerprint, by default 512 - isQuery : bool, optional - use the fingerprint for a query structure, by default False - resetVect : bool, optional - reset vector, by default False NB: only used in GetAvalonFP (not for GetAvalonCountFP) - bitFlags : int, optional - Substructure fingerprint (32767) or similarity fingerprint (15761407) by default 15761407 - useCounts : bool, optional - If toggled will create the count and not bit-based fingerprint, by default False - """ - super().__init__( - parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype - ) - self.fpSize = fpSize - self.isQuery = isQuery - self.resetVect = resetVect - self.bitFlags = bitFlags - self.useCounts = useCounts - - def _mol2fp(self, mol): - if self.useCounts: - return pyAvalonTools.GetAvalonCountFP( - mol, - nBits=int(self.fpSize), - isQuery=bool(self.isQuery), - bitFlags=int(self.bitFlags), - ) - else: - return pyAvalonTools.GetAvalonFP( - mol, - nBits=int(self.fpSize), - isQuery=bool(self.isQuery), - resetVect=bool(self.resetVect), - bitFlags=int(self.bitFlags), - ) - - -class MorganFingerprintTransformer(FpsTransformer): - def __init__( - self, - fpSize=2048, - radius=2, - useChirality=False, - useBondTypes=True, - useFeatures=False, - useCounts=False, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - dtype: np.dtype = np.int8, - ): - """Transform RDKit mols into Count or bit-based hashed MorganFingerprints - - Parameters - ---------- - fpSize : int, optional - Size of the hashed fingerprint, by default 2048 - radius : int, optional - Radius of the fingerprint, by default 2 - useChirality : bool, optional - Include chirality in calculation of the fingerprint keys, by default False - useBondTypes : bool, optional - Include bondtypes in calculation of the fingerprint keys, by default True - useFeatures : bool, optional - use chemical features, rather than atom-type in calculation of the fingerprint keys, by default False - useCounts : bool, optional - If toggled will create the count and not bit-based fingerprint, by default False - """ - super().__init__( - parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype - ) - self.fpSize = fpSize - self.radius = radius - self.useChirality = useChirality - self.useBondTypes = useBondTypes - self.useFeatures = useFeatures - self.useCounts = useCounts - - warn("MorganFingerprintTransformer will be replace by MorganGeneratorTransformer, due to changes in RDKit!", DeprecationWarning) - - def _mol2fp(self, mol): - if self.useCounts: - return rdMolDescriptors.GetHashedMorganFingerprint( - mol, - int(self.radius), - nBits=int(self.fpSize), - useFeatures=bool(self.useFeatures), - useChirality=bool(self.useChirality), - useBondTypes=bool(self.useBondTypes), - ) - else: - return rdMolDescriptors.GetMorganFingerprintAsBitVect( - mol, - int(self.radius), - nBits=int(self.fpSize), - useFeatures=bool(self.useFeatures), - useChirality=bool(self.useChirality), - useBondTypes=bool(self.useBondTypes), - ) - - -class RDKitFingerprintTransformer(FpsTransformer): - def __init__( - self, - minPath: int = 1, - maxPath: int = 7, - useHs: bool = True, - branchedPaths: bool = True, - useBondOrder: bool = True, - countSimulation: bool = False, - countBounds=None, - fpSize: int = 2048, - numBitsPerFeature: int = 2, - atomInvariantsGenerator=None, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - dtype: np.dtype = np.int8, - ): - """Calculates the RDKit fingerprints - - Parameters - ---------- - minPath : int, optional - the minimum path length (in bonds) to be included, by default 1 - maxPath : int, optional - the maximum path length (in bonds) to be included, by default 7 - useHs : bool, optional - toggles inclusion of Hs in paths (if the molecule has explicit Hs), by default True - branchedPaths : bool, optional - toggles generation of branched subgraphs, not just linear paths, by default True - useBondOrder : bool, optional - toggles inclusion of bond orders in the path hashes, by default True - countSimulation : bool, optional - if set, use count simulation while generating the fingerprint, by default False - countBounds : _type_, optional - boundaries for count simulation, corresponding bit will be set if the count is higher than the number provided for that spot, by default None - fpSize : int, optional - size of the generated fingerprint, does not affect the sparse versions, by default 2048 - numBitsPerFeature : int, optional - the number of bits set per path/subgraph found, by default 2 - atomInvariantsGenerator : _type_, optional - atom invariants to be used during fingerprint generation, by default None - """ - super().__init__( - parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype - ) - self.minPath = minPath - self.maxPath = maxPath - self.useHs = useHs - self.branchedPaths = branchedPaths - self.useBondOrder = useBondOrder - self.countSimulation = countSimulation - self.countBounds = countBounds - self.fpSize = fpSize - self.numBitsPerFeature = numBitsPerFeature - self.atomInvariantsGenerator = atomInvariantsGenerator - - warn("RDKitFingerprintTransformer will be replace by RDKitFPGeneratorTransformer, due to changes in RDKit!", DeprecationWarning) - - - def _mol2fp(self, mol): - generator = rdFingerprintGenerator.GetRDKitFPGenerator( - minPath=int(self.minPath), - maxPath=int(self.maxPath), - useHs=bool(self.useHs), - branchedPaths=bool(self.branchedPaths), - useBondOrder=bool(self.useBondOrder), - countSimulation=bool(self.countSimulation), - countBounds=bool(self.countBounds), - fpSize=int(self.fpSize), - numBitsPerFeature=int(self.numBitsPerFeature), - atomInvariantsGenerator=self.atomInvariantsGenerator, - ) - return generator.GetFingerprint(mol) - - -class AtomPairFingerprintTransformer(FpsTransformer): - def __init__( - self, - minLength: int = 1, - maxLength: int = 30, - fromAtoms=0, - ignoreAtoms=0, - atomInvariants=0, - nBitsPerEntry: int = 4, - includeChirality: bool = False, - use2D: bool = True, - confId: int = -1, - fpSize=2048, - useCounts: bool = False, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - dtype: np.dtype = np.int8, - ): - super().__init__( - parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype - ) - self.minLength = minLength - self.maxLength = maxLength - self.fromAtoms = fromAtoms - self.ignoreAtoms = ignoreAtoms - self.atomInvariants = atomInvariants - self.includeChirality = includeChirality - self.use2D = use2D - self.confId = confId - self.fpSize = fpSize - self.nBitsPerEntry = nBitsPerEntry - self.useCounts = useCounts - - warn("AtomPairFingerprintTransformer will be replace by AtomPairFPGeneratorTransformer, due to changes in RDKit!", DeprecationWarning) - - def _mol2fp(self, mol): - if self.useCounts: - return rdMolDescriptors.GetHashedAtomPairFingerprint( - mol, - nBits=int(self.fpSize), - minLength=int(self.minLength), - maxLength=int(self.maxLength), - fromAtoms=self.fromAtoms, - ignoreAtoms=self.ignoreAtoms, - atomInvariants=self.atomInvariants, - includeChirality=bool(self.includeChirality), - use2D=bool(self.use2D), - confId=int(self.confId), - ) - else: - return rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( - mol, - nBits=int(self.fpSize), - minLength=int(self.minLength), - maxLength=int(self.maxLength), - fromAtoms=self.fromAtoms, - ignoreAtoms=self.ignoreAtoms, - atomInvariants=self.atomInvariants, - nBitsPerEntry=int(self.nBitsPerEntry), - includeChirality=bool(self.includeChirality), - use2D=bool(self.use2D), - confId=int(self.confId), - ) - - -class TopologicalTorsionFingerprintTransformer(FpsTransformer): - def __init__( - self, - targetSize: int = 4, - fromAtoms=0, - ignoreAtoms=0, - atomInvariants=0, - includeChirality: bool = False, - nBitsPerEntry: int = 4, - fpSize=2048, - useCounts: bool = False, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - dtype: np.dtype = np.int8, - ): - super().__init__( - parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype - ) - self.targetSize = targetSize - self.fromAtoms = fromAtoms - self.ignoreAtoms = ignoreAtoms - self.atomInvariants = atomInvariants - self.includeChirality = includeChirality - self.nBitsPerEntry = nBitsPerEntry - self.fpSize = fpSize - self.useCounts = useCounts - - warn("TopologicalTorsionFingerprintTransformer will be replace by TopologicalTorsionFPGeneatorTransformer, due to changes in RDKit!", DeprecationWarning) - - def _mol2fp(self, mol): - if self.useCounts: - return rdMolDescriptors.GetHashedTopologicalTorsionFingerprint( - mol, - nBits=int(self.fpSize), - targetSize=int(self.targetSize), - fromAtoms=self.fromAtoms, - ignoreAtoms=self.ignoreAtoms, - atomInvariants=self.atomInvariants, - includeChirality=bool(self.includeChirality), - ) - else: - return rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( - mol, - nBits=int(self.fpSize), - targetSize=int(self.targetSize), - fromAtoms=self.fromAtoms, - ignoreAtoms=self.ignoreAtoms, - atomInvariants=self.atomInvariants, - includeChirality=bool(self.includeChirality), - nBitsPerEntry=int(self.nBitsPerEntry), - ) - - -def parallel_helper(args): - """Parallel_helper takes a tuple with classname, the objects parameters and the mols to process. - Then instantiates the class with the parameters and processes the mol. - Intention is to be able to do this in child processes as some classes can't be pickled""" - classname, parameters, X_mols = args - from scikit_mol import fingerprints - - transformer = getattr(fingerprints, classname)(**parameters) - return transformer._transform(X_mols) - - class FpsGeneratorTransformer(FpsTransformer): _regenerate_on_properties = () @@ -755,22 +216,29 @@ def __getstate__(self): state = super().__getstate__() state.update(self.get_params()) # Remove the unpicklable property from the state - state.pop("_fpgen", None) # fpgen is not picklable + state.pop("_fpgen", None) # fpgen is not picklable return state def __setstate__(self, state): # Restore the state of the parent class super().__setstate__(state) # Re-create the unpicklable property - generatort_keys = inspect.signature(self._generate_fp_generator).parameters.keys() - params = [setattr(self, k, state["_"+k]) if "_"+k in state else setattr(self, k, state[k]) for k in generatort_keys] + generatort_keys = inspect.signature( + self._generate_fp_generator + ).parameters.keys() + params = [ + setattr(self, k, state["_" + k]) + if "_" + k in state + else setattr(self, k, state[k]) + for k in generatort_keys + ] self._generate_fp_generator() def __setattr__(self, name: str, value): super().__setattr__(name, value) if ( - not hasattr(self, "_initializing") - and name in self._regenerate_on_properties + not hasattr(self, "_initializing") + and name in self._regenerate_on_properties ): self._generate_fp_generator() @@ -787,190 +255,12 @@ def _transform_mol(self, mol) -> np.array: raise NotImplementedError("_transform_mol not implemented") -class MorganFPGeneratorTransformer(FpsGeneratorTransformer): - _regenerate_on_properties = ("radius", "fpSize", "useChirality", "useFeatures", "useBondTypes") - - def __init__(self, fpSize=2048, radius=2, useChirality=False, - useBondTypes=True, useFeatures=False, useCounts=False, - parallel: Union[bool, int] = False, ): - """Transform RDKit mols into Count or bit-based hashed MorganFingerprints - - Parameters - ---------- - fpsize : int, optional - Size of the hashed fingerprint, by default 2048 - radius : int, optional - Radius of the fingerprint, by default 2 - useChirality : bool, optional - Include chirality in calculation of the fingerprint keys, by default False - useBondTypes : bool, optional - Include bondtypes in calculation of the fingerprint keys, by default True - useFeatures : bool, optional - use chemical features, rather than atom-type in calculation of the fingerprint keys, by default False - useCounts : bool, optional - If toggled will create the count and not bit-based fingerprint, by default False - """ - - self._initializing = True - super().__init__(parallel = parallel) - self.fpSize = fpSize - self.radius = radius - self.useChirality = useChirality - self.useFeatures = useFeatures - self.useCounts = useCounts - self.useBondTypes = useBondTypes - - self._generate_fp_generator() - delattr(self, "_initializing") - - - def _generate_fp_generator(self): - - if self.useFeatures: - atomInvariantsGenerator = GetMorganFeatureAtomInvGen() - else: - atomInvariantsGenerator = None - - self._fpgen = GetMorganGenerator(radius=self.radius, - fpSize=self.fpSize, - includeChirality=self.useChirality, - useBondTypes=self.useBondTypes, - atomInvariantsGenerator=atomInvariantsGenerator, - ) - - def _transform_mol(self, mol) -> np.array: - if self.useCounts: - return self._fpgen.GetCountFingerprintAsNumPy(mol) - else: - return self._fpgen.GetFingerprintAsNumPy(mol) - - -class TopologicalTorsionFPGeneatorTransformer(FpsGeneratorTransformer): - _regenerate_on_properties = ("fpSize", "includeChirality", "targetSize") - - def __init__(self, targetSize:int = 4, fromAtoms = None, ignoreAtoms = None, atomInvariants = None, confId=-1, - includeChirality:bool = False, fpSize:int=2048, - useCounts:bool=False, parallel: Union[bool, int] = False): - - self._initializing = True - super().__init__(parallel=parallel) - self.fpSize = fpSize - self.includeChirality = includeChirality - self.targetSize = targetSize - - self.fromAtoms = fromAtoms - self.ignoreAtoms = ignoreAtoms - self.atomInvariants = atomInvariants - self.confId = confId - self.useCounts = useCounts - - self._generate_fp_generator() - delattr(self, "_initializing") - - - def _generate_fp_generator(self): - self._fpgen = GetTopologicalTorsionGenerator(torsionAtomCount=self.targetSize, includeChirality=self.includeChirality, - fpSize=self.fpSize) - - def _transform_mol(self, mol) -> np.array: - if self.useCounts: - return self._fpgen.GetCountFingerprintAsNumPy(mol, fromAtoms=self.fromAtoms, ignoreAtoms=self.ignoreAtoms, customAtomInvariants=self.atomInvariants) - else: - return self._fpgen.GetFingerprintAsNumPy(mol, fromAtoms=self.fromAtoms, ignoreAtoms=self.ignoreAtoms, customAtomInvariants=self.atomInvariants) - - -class AtomPairFPGeneratorTransformer(FpsGeneratorTransformer): - _regenerate_on_properties = ("fpSize", "includeChirality", "use2D", "minLength", "maxLength") - - def __init__(self, minLength:int = 1, maxLength:int = 30, fromAtoms = None, ignoreAtoms = None, atomInvariants = None, - includeChirality:bool = False, use2D:bool = True, confId:int = -1, fpSize:int=2048, - useCounts:bool=False, parallel: Union[bool, int] = False,): - self._initializing = True - super().__init__(parallel = parallel) - self.fpSize = fpSize - self.use2D = use2D - self.includeChirality = includeChirality - self.minLength = minLength - self.maxLength = maxLength - - self.useCounts= useCounts - self.confId = confId - self.fromAtoms = fromAtoms - self.ignoreAtoms = ignoreAtoms - self.atomInvariants = atomInvariants - - self._generate_fp_generator() - delattr(self, "_initializing") - - def _generate_fp_generator(self): - self._fpgen = GetAtomPairGenerator(minDistance=self.minLength, maxDistance=self.maxLength, - includeChirality=self.includeChirality, - use2D=self.use2D, fpSize=self.fpSize) - - def _transform_mol(self, mol) -> np.array: - if self.useCounts: - return self._fpgen.GetCountFingerprintAsNumPy(mol, fromAtoms=self.fromAtoms, ignoreAtoms=self.ignoreAtoms, customAtomInvariants=self.atomInvariants) - else: - return self._fpgen.GetFingerprintAsNumPy(mol, fromAtoms=self.fromAtoms, ignoreAtoms=self.ignoreAtoms, customAtomInvariants=self.atomInvariants) - - -class RDKitFPGeneratorTransformer(FpsGeneratorTransformer): - _regenerate_on_properties = ("minPath", "maxPath", "useHs", "branchedPaths", "useBondOrder", "countSimulation", "fpSize", "countBounds", - "numBitsPerFeature") - - def __init__(self, minPath:int = 1, maxPath:int =7, useHs:bool = True, branchedPaths:bool = True, - useBondOrder:bool = True, countSimulation:bool = False, countBounds = None, - fpSize:int = 2048, numBitsPerFeature:int = 2, - useCounts:bool = False, parallel: Union[bool, int] = False - ): - """Calculates the RDKit fingerprints - - Parameters - ---------- - minPath : int, optional - the minimum path length (in bonds) to be included, by default 1 - maxPath : int, optional - the maximum path length (in bonds) to be included, by default 7 - useHs : bool, optional - toggles inclusion of Hs in paths (if the molecule has explicit Hs), by default True - branchedPaths : bool, optional - toggles generation of branched subgraphs, not just linear paths, by default True - useBondOrder : bool, optional - toggles inclusion of bond orders in the path hashes, by default True - countSimulation : bool, optional - if set, use count simulation while generating the fingerprint, by default False - countBounds : _type_, optional - boundaries for count simulation, corresponding bit will be set if the count is higher than the number provided for that spot, by default None - fpSize : int, optional - size of the generated fingerprint, does not affect the sparse versions, by default 2048 - numBitsPerFeature : int, optional - the number of bits set per path/subgraph found, by default 2 - """ - self._initializing = True - super().__init__(parallel = parallel) - self.minPath = minPath - self.maxPath = maxPath - self.useHs = useHs - self.branchedPaths = branchedPaths - self.useBondOrder = useBondOrder - self.countSimulation = countSimulation - self.fpSize = fpSize - self.numBitsPerFeature = numBitsPerFeature - self.countBounds = countBounds - - self.useCounts = useCounts - - self._generate_fp_generator() - delattr(self, "_initializing") - - def _transform_mol(self, mol) -> np.array: - if self.useCounts: - return self._fpgen.GetCountFingerprintAsNumPy(mol) - else: - return self._fpgen.GetFingerprintAsNumPy(mol) +def parallel_helper(args): + """Parallel_helper takes a tuple with classname, the objects parameters and the mols to process. + Then instantiates the class with the parameters and processes the mol. + Intention is to be able to do this in child processes as some classes can't be pickled""" + classname, parameters, X_mols = args + from scikit_mol import fingerprints - def _generate_fp_generator(self): - self._fpgen = GetRDKitFPGenerator(minPath=self.minPath, maxPath=self.maxPath, useHs=self.useHs, - branchedPaths=self.branchedPaths,useBondOrder=self.useBondOrder, - countSimulation=self.countSimulation, fpSize=self.fpSize, - countBounds=self.countBounds, numBitsPerFeature=self.numBitsPerFeature) + transformer = getattr(fingerprints, classname)(**parameters) + return transformer._transform(X_mols) diff --git a/scikit_mol/fingerprints/maccs.py b/scikit_mol/fingerprints/maccs.py new file mode 100644 index 0000000..ca38966 --- /dev/null +++ b/scikit_mol/fingerprints/maccs.py @@ -0,0 +1,41 @@ +from typing import Union +from rdkit.Chem import rdMolDescriptors +import numpy as np + +from .baseclasses import FpsTransformer + + +class MACCSKeysFingerprintTransformer(FpsTransformer): + def __init__( + self, + parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, + dtype: np.dtype = np.int8, + fpSize=167, + ): + """MACCS keys fingerprinter + calculates the 167 fixed MACCS keys + """ + super().__init__( + parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype + ) + if fpSize != 167: + raise ValueError( + "fpSize can only be 167, matching the number of defined MACCS keys!" + ) + self._fpSize = fpSize + + @property + def fpSize(self): + return self._fpSize + + @fpSize.setter + def fpSize(self, fpSize): + if fpSize != 167: + raise ValueError( + "fpSize can only be 167, matching the number of defined MACCS keys!" + ) + self._fpSize = fpSize + + def _mol2fp(self, mol): + return rdMolDescriptors.GetMACCSKeysFingerprint(mol) diff --git a/scikit_mol/fingerprints/minhash.py b/scikit_mol/fingerprints/minhash.py new file mode 100644 index 0000000..1c7e62a --- /dev/null +++ b/scikit_mol/fingerprints/minhash.py @@ -0,0 +1,206 @@ +from typing import Union + +import numpy as np + +from warnings import warn + +from .baseclasses import FpsTransformer + +from rdkit.Chem import rdMHFPFingerprint + + +class MHFingerprintTransformer(FpsTransformer): + def __init__( + self, + radius: int = 3, + rings: bool = True, + isomeric: bool = False, + kekulize: bool = False, + min_radius: int = 1, + fpSize: int = 2048, + seed: int = 42, + parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, + dtype: np.dtype = np.int32, + ): + """Transforms the RDKit mol into the MinHash fingerprint (MHFP) + + https://jcheminf.biomedcentral.com/articles/10.1186/s13321-018-0321-8 + + Args: + radius (int, optional): The MHFP radius. Defaults to 3. + rings (bool, optional): Whether or not to include rings in the shingling. Defaults to True. + isomeric (bool, optional): Whether the isomeric SMILES to be considered. Defaults to False. + kekulize (bool, optional): Whether or not to kekulize the extracted SMILES. Defaults to False. + min_radius (int, optional): The minimum radius that is used to extract n-gram. Defaults to 1. + fpSize (int, optional): The number of permutations used for hashing. Defaults to 2048, + this is effectively the length of the FP + seed (int, optional): The value used to seed numpy.random. Defaults to 0. + """ + super().__init__( + parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype + ) + self.radius = radius + self.rings = rings + self.isomeric = isomeric + self.kekulize = kekulize + self.min_radius = min_radius + # Set the .n_permutations and .seed without creating the encoder twice + self.fpSize = fpSize + self._seed = seed + # create the encoder instance + self._recreate_encoder() + + def __getstate__(self): + # Get the state of the parent class + state = super().__getstate__() + # Remove the unpicklable property from the state + state.pop("mhfp_encoder", None) # mhfp_encoder is not picklable + return state + + def __setstate__(self, state): + # Restore the state of the parent class + super().__setstate__(state) + # Re-create the unpicklable property + self._recreate_encoder() + + def _mol2fp(self, mol): + fp = self.mhfp_encoder.EncodeMol( + mol, self.radius, self.rings, self.isomeric, self.kekulize, self.min_radius + ) + return fp + + def _fp2array(self, fp): + return np.array(fp) + + def _recreate_encoder(self): + self.mhfp_encoder = rdMHFPFingerprint.MHFPEncoder(self.fpSize, self._seed) + + @property + def seed(self): + return self._seed + + @seed.setter + def seed(self, seed): + self._seed = seed + # each time the seed parameter is modified refresh an instance of the encoder + self._recreate_encoder() + + @property + def n_permutations(self): + warn( + "n_permutations will be replace by fpSize, due to changes harmonization!", + DeprecationWarning, + ) + return self.fpSize + + @n_permutations.setter + def n_permutations(self, n_permutations): + warn( + "n_permutations will be replace by fpSize, due to changes harmonization!", + DeprecationWarning, + ) + self.fpSize = n_permutations + # each time the n_permutations parameter is modified refresh an instance of the encoder + self._recreate_encoder() + + +class SECFingerprintTransformer(FpsTransformer): + # https://jcheminf.biomedcentral.com/articles/10.1186/s13321-018-0321-8 + def __init__( + self, + radius: int = 3, + rings: bool = True, + isomeric: bool = False, + kekulize: bool = False, + min_radius: int = 1, + fpSize: int = 2048, + n_permutations: int = 0, + seed: int = 0, + parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, + dtype: np.dtype = np.int8, + ): + """Transforms the RDKit mol into the SMILES extended connectivity fingerprint (SECFP) + + Args: + radius (int, optional): The MHFP radius. Defaults to 3. + rings (bool, optional): Whether or not to include rings in the shingling. Defaults to True. + isomeric (bool, optional): Whether the isomeric SMILES to be considered. Defaults to False. + kekulize (bool, optional): Whether or not to kekulize the extracted SMILES. Defaults to False. + min_radius (int, optional): The minimum radius that is used to extract n-gram. Defaults to 1. + fpSize (int, optional): The length of the folded fingerprint. Defaults to 2048. + n_permutations (int, optional): The number of permutations used for hashing. Defaults to 0. + seed (int, optional): The value used to seed numpy.random. Defaults to 0. + """ + super().__init__( + parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype + ) + self.radius = radius + self.rings = rings + self.isomeric = isomeric + self.kekulize = kekulize + self.min_radius = min_radius + self.fpSize = fpSize + # Set the .n_permutations and seed without creating the encoder twice + self._n_permutations = n_permutations + self._seed = seed + # create the encoder instance + self._recreate_encoder() + + def __getstate__(self): + # Get the state of the parent class + state = super().__getstate__() + # Remove the unpicklable property from the state + state.pop("mhfp_encoder", None) # mhfp_encoder is not picklable + return state + + def __setstate__(self, state): + # Restore the state of the parent class + super().__setstate__(state) + # Re-create the unpicklable property + self._recreate_encoder() + + def _mol2fp(self, mol): + return self.mhfp_encoder.EncodeSECFPMol( + mol, + self.radius, + self.rings, + self.isomeric, + self.kekulize, + self.min_radius, + self.length, + ) + + def _recreate_encoder(self): + self.mhfp_encoder = rdMHFPFingerprint.MHFPEncoder( + self._n_permutations, self._seed + ) + + @property + def seed(self): + return self._seed + + @seed.setter + def seed(self, seed): + self._seed = seed + # each time the seed parameter is modified refresh an instace of the encoder + self._recreate_encoder() + + @property + def n_permutations(self): + return self._n_permutations + + @n_permutations.setter + def n_permutations(self, n_permutations): + self._n_permutations = n_permutations + # each time the n_permutations parameter is modified refresh an instace of the encoder + self._recreate_encoder() + + @property + def length(self): + warn( + "length will be replace by fpSize, due to changes harmonization!", + DeprecationWarning, + ) + return self.fpSize diff --git a/scikit_mol/fingerprints/morgan.py b/scikit_mol/fingerprints/morgan.py new file mode 100644 index 0000000..37d7cf8 --- /dev/null +++ b/scikit_mol/fingerprints/morgan.py @@ -0,0 +1,150 @@ +from typing import Union + +from rdkit.Chem import rdMolDescriptors + +import numpy as np + +from warnings import warn + +from rdkit.Chem.rdFingerprintGenerator import ( + GetMorganGenerator, + GetMorganFeatureAtomInvGen, +) + +from .baseclasses import FpsTransformer, FpsGeneratorTransformer + + +class MorganFingerprintTransformer(FpsTransformer): + def __init__( + self, + fpSize=2048, + radius=2, + useChirality=False, + useBondTypes=True, + useFeatures=False, + useCounts=False, + parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, + dtype: np.dtype = np.int8, + ): + """Transform RDKit mols into Count or bit-based hashed MorganFingerprints + + Parameters + ---------- + fpSize : int, optional + Size of the hashed fingerprint, by default 2048 + radius : int, optional + Radius of the fingerprint, by default 2 + useChirality : bool, optional + Include chirality in calculation of the fingerprint keys, by default False + useBondTypes : bool, optional + Include bondtypes in calculation of the fingerprint keys, by default True + useFeatures : bool, optional + use chemical features, rather than atom-type in calculation of the fingerprint keys, by default False + useCounts : bool, optional + If toggled will create the count and not bit-based fingerprint, by default False + """ + super().__init__( + parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype + ) + self.fpSize = fpSize + self.radius = radius + self.useChirality = useChirality + self.useBondTypes = useBondTypes + self.useFeatures = useFeatures + self.useCounts = useCounts + + warn( + "MorganFingerprintTransformer will be replace by MorganGeneratorTransformer, due to changes in RDKit!", + DeprecationWarning, + ) + + def _mol2fp(self, mol): + if self.useCounts: + return rdMolDescriptors.GetHashedMorganFingerprint( + mol, + int(self.radius), + nBits=int(self.fpSize), + useFeatures=bool(self.useFeatures), + useChirality=bool(self.useChirality), + useBondTypes=bool(self.useBondTypes), + ) + else: + return rdMolDescriptors.GetMorganFingerprintAsBitVect( + mol, + int(self.radius), + nBits=int(self.fpSize), + useFeatures=bool(self.useFeatures), + useChirality=bool(self.useChirality), + useBondTypes=bool(self.useBondTypes), + ) + + +class MorganFPGeneratorTransformer(FpsGeneratorTransformer): + _regenerate_on_properties = ( + "radius", + "fpSize", + "useChirality", + "useFeatures", + "useBondTypes", + ) + + def __init__( + self, + fpSize=2048, + radius=2, + useChirality=False, + useBondTypes=True, + useFeatures=False, + useCounts=False, + parallel: Union[bool, int] = False, + ): + """Transform RDKit mols into Count or bit-based hashed MorganFingerprints + + Parameters + ---------- + fpsize : int, optional + Size of the hashed fingerprint, by default 2048 + radius : int, optional + Radius of the fingerprint, by default 2 + useChirality : bool, optional + Include chirality in calculation of the fingerprint keys, by default False + useBondTypes : bool, optional + Include bondtypes in calculation of the fingerprint keys, by default True + useFeatures : bool, optional + use chemical features, rather than atom-type in calculation of the fingerprint keys, by default False + useCounts : bool, optional + If toggled will create the count and not bit-based fingerprint, by default False + """ + + self._initializing = True + super().__init__(parallel=parallel) + self.fpSize = fpSize + self.radius = radius + self.useChirality = useChirality + self.useFeatures = useFeatures + self.useCounts = useCounts + self.useBondTypes = useBondTypes + + self._generate_fp_generator() + delattr(self, "_initializing") + + def _generate_fp_generator(self): + if self.useFeatures: + atomInvariantsGenerator = GetMorganFeatureAtomInvGen() + else: + atomInvariantsGenerator = None + + self._fpgen = GetMorganGenerator( + radius=self.radius, + fpSize=self.fpSize, + includeChirality=self.useChirality, + useBondTypes=self.useBondTypes, + atomInvariantsGenerator=atomInvariantsGenerator, + ) + + def _transform_mol(self, mol) -> np.array: + if self.useCounts: + return self._fpgen.GetCountFingerprintAsNumPy(mol) + else: + return self._fpgen.GetFingerprintAsNumPy(mol) diff --git a/scikit_mol/fingerprints/rdkitfp.py b/scikit_mol/fingerprints/rdkitfp.py new file mode 100644 index 0000000..28ce0a8 --- /dev/null +++ b/scikit_mol/fingerprints/rdkitfp.py @@ -0,0 +1,175 @@ +from typing import Union + +import numpy as np + +from warnings import warn + +from .baseclasses import FpsTransformer, FpsGeneratorTransformer + +from rdkit.Chem.rdFingerprintGenerator import GetRDKitFPGenerator + +from rdkit.Chem import rdFingerprintGenerator + + +class RDKitFingerprintTransformer(FpsTransformer): + def __init__( + self, + minPath: int = 1, + maxPath: int = 7, + useHs: bool = True, + branchedPaths: bool = True, + useBondOrder: bool = True, + countSimulation: bool = False, + countBounds=None, + fpSize: int = 2048, + numBitsPerFeature: int = 2, + atomInvariantsGenerator=None, + parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, + dtype: np.dtype = np.int8, + ): + """Calculates the RDKit fingerprints + + Parameters + ---------- + minPath : int, optional + the minimum path length (in bonds) to be included, by default 1 + maxPath : int, optional + the maximum path length (in bonds) to be included, by default 7 + useHs : bool, optional + toggles inclusion of Hs in paths (if the molecule has explicit Hs), by default True + branchedPaths : bool, optional + toggles generation of branched subgraphs, not just linear paths, by default True + useBondOrder : bool, optional + toggles inclusion of bond orders in the path hashes, by default True + countSimulation : bool, optional + if set, use count simulation while generating the fingerprint, by default False + countBounds : _type_, optional + boundaries for count simulation, corresponding bit will be set if the count is higher than the number provided for that spot, by default None + fpSize : int, optional + size of the generated fingerprint, does not affect the sparse versions, by default 2048 + numBitsPerFeature : int, optional + the number of bits set per path/subgraph found, by default 2 + atomInvariantsGenerator : _type_, optional + atom invariants to be used during fingerprint generation, by default None + """ + super().__init__( + parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype + ) + self.minPath = minPath + self.maxPath = maxPath + self.useHs = useHs + self.branchedPaths = branchedPaths + self.useBondOrder = useBondOrder + self.countSimulation = countSimulation + self.countBounds = countBounds + self.fpSize = fpSize + self.numBitsPerFeature = numBitsPerFeature + self.atomInvariantsGenerator = atomInvariantsGenerator + + warn( + "RDKitFingerprintTransformer will be replace by RDKitFPGeneratorTransformer, due to changes in RDKit!", + DeprecationWarning, + ) + + def _mol2fp(self, mol): + generator = rdFingerprintGenerator.GetRDKitFPGenerator( + minPath=int(self.minPath), + maxPath=int(self.maxPath), + useHs=bool(self.useHs), + branchedPaths=bool(self.branchedPaths), + useBondOrder=bool(self.useBondOrder), + countSimulation=bool(self.countSimulation), + countBounds=bool(self.countBounds), + fpSize=int(self.fpSize), + numBitsPerFeature=int(self.numBitsPerFeature), + atomInvariantsGenerator=self.atomInvariantsGenerator, + ) + return generator.GetFingerprint(mol) + + +class RDKitFPGeneratorTransformer(FpsGeneratorTransformer): + _regenerate_on_properties = ( + "minPath", + "maxPath", + "useHs", + "branchedPaths", + "useBondOrder", + "countSimulation", + "fpSize", + "countBounds", + "numBitsPerFeature", + ) + + def __init__( + self, + minPath: int = 1, + maxPath: int = 7, + useHs: bool = True, + branchedPaths: bool = True, + useBondOrder: bool = True, + countSimulation: bool = False, + countBounds=None, + fpSize: int = 2048, + numBitsPerFeature: int = 2, + useCounts: bool = False, + parallel: Union[bool, int] = False, + ): + """Calculates the RDKit fingerprints + + Parameters + ---------- + minPath : int, optional + the minimum path length (in bonds) to be included, by default 1 + maxPath : int, optional + the maximum path length (in bonds) to be included, by default 7 + useHs : bool, optional + toggles inclusion of Hs in paths (if the molecule has explicit Hs), by default True + branchedPaths : bool, optional + toggles generation of branched subgraphs, not just linear paths, by default True + useBondOrder : bool, optional + toggles inclusion of bond orders in the path hashes, by default True + countSimulation : bool, optional + if set, use count simulation while generating the fingerprint, by default False + countBounds : _type_, optional + boundaries for count simulation, corresponding bit will be set if the count is higher than the number provided for that spot, by default None + fpSize : int, optional + size of the generated fingerprint, does not affect the sparse versions, by default 2048 + numBitsPerFeature : int, optional + the number of bits set per path/subgraph found, by default 2 + """ + self._initializing = True + super().__init__(parallel=parallel) + self.minPath = minPath + self.maxPath = maxPath + self.useHs = useHs + self.branchedPaths = branchedPaths + self.useBondOrder = useBondOrder + self.countSimulation = countSimulation + self.fpSize = fpSize + self.numBitsPerFeature = numBitsPerFeature + self.countBounds = countBounds + + self.useCounts = useCounts + + self._generate_fp_generator() + delattr(self, "_initializing") + + def _transform_mol(self, mol) -> np.array: + if self.useCounts: + return self._fpgen.GetCountFingerprintAsNumPy(mol) + else: + return self._fpgen.GetFingerprintAsNumPy(mol) + + def _generate_fp_generator(self): + self._fpgen = GetRDKitFPGenerator( + minPath=self.minPath, + maxPath=self.maxPath, + useHs=self.useHs, + branchedPaths=self.branchedPaths, + useBondOrder=self.useBondOrder, + countSimulation=self.countSimulation, + fpSize=self.fpSize, + countBounds=self.countBounds, + numBitsPerFeature=self.numBitsPerFeature, + ) diff --git a/scikit_mol/fingerprints/topologicaltorsion.py b/scikit_mol/fingerprints/topologicaltorsion.py new file mode 100644 index 0000000..0b6640d --- /dev/null +++ b/scikit_mol/fingerprints/topologicaltorsion.py @@ -0,0 +1,120 @@ +from typing import Union + +import numpy as np + +from warnings import warn + +from .baseclasses import FpsTransformer, FpsGeneratorTransformer + +from rdkit.Chem import rdMolDescriptors +from rdkit.Chem.rdFingerprintGenerator import GetTopologicalTorsionGenerator + + +class TopologicalTorsionFingerprintTransformer(FpsTransformer): + def __init__( + self, + targetSize: int = 4, + fromAtoms=0, + ignoreAtoms=0, + atomInvariants=0, + includeChirality: bool = False, + nBitsPerEntry: int = 4, + fpSize=2048, + useCounts: bool = False, + parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, + dtype: np.dtype = np.int8, + ): + super().__init__( + parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype + ) + self.targetSize = targetSize + self.fromAtoms = fromAtoms + self.ignoreAtoms = ignoreAtoms + self.atomInvariants = atomInvariants + self.includeChirality = includeChirality + self.nBitsPerEntry = nBitsPerEntry + self.fpSize = fpSize + self.useCounts = useCounts + + warn( + "TopologicalTorsionFingerprintTransformer will be replace by TopologicalTorsionFPGeneatorTransformer, due to changes in RDKit!", + DeprecationWarning, + ) + + def _mol2fp(self, mol): + if self.useCounts: + return rdMolDescriptors.GetHashedTopologicalTorsionFingerprint( + mol, + nBits=int(self.fpSize), + targetSize=int(self.targetSize), + fromAtoms=self.fromAtoms, + ignoreAtoms=self.ignoreAtoms, + atomInvariants=self.atomInvariants, + includeChirality=bool(self.includeChirality), + ) + else: + return rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( + mol, + nBits=int(self.fpSize), + targetSize=int(self.targetSize), + fromAtoms=self.fromAtoms, + ignoreAtoms=self.ignoreAtoms, + atomInvariants=self.atomInvariants, + includeChirality=bool(self.includeChirality), + nBitsPerEntry=int(self.nBitsPerEntry), + ) + + +class TopologicalTorsionFPGeneatorTransformer(FpsGeneratorTransformer): + _regenerate_on_properties = ("fpSize", "includeChirality", "targetSize") + + def __init__( + self, + targetSize: int = 4, + fromAtoms=None, + ignoreAtoms=None, + atomInvariants=None, + confId=-1, + includeChirality: bool = False, + fpSize: int = 2048, + useCounts: bool = False, + parallel: Union[bool, int] = False, + ): + self._initializing = True + super().__init__(parallel=parallel) + self.fpSize = fpSize + self.includeChirality = includeChirality + self.targetSize = targetSize + + self.fromAtoms = fromAtoms + self.ignoreAtoms = ignoreAtoms + self.atomInvariants = atomInvariants + self.confId = confId + self.useCounts = useCounts + + self._generate_fp_generator() + delattr(self, "_initializing") + + def _generate_fp_generator(self): + self._fpgen = GetTopologicalTorsionGenerator( + torsionAtomCount=self.targetSize, + includeChirality=self.includeChirality, + fpSize=self.fpSize, + ) + + def _transform_mol(self, mol) -> np.array: + if self.useCounts: + return self._fpgen.GetCountFingerprintAsNumPy( + mol, + fromAtoms=self.fromAtoms, + ignoreAtoms=self.ignoreAtoms, + customAtomInvariants=self.atomInvariants, + ) + else: + return self._fpgen.GetFingerprintAsNumPy( + mol, + fromAtoms=self.fromAtoms, + ignoreAtoms=self.ignoreAtoms, + customAtomInvariants=self.atomInvariants, + ) From f092febfbdcab45f32088217add98b48ff8e215c Mon Sep 17 00:00:00 2001 From: Esben Jannik Bjerrum Date: Fri, 22 Nov 2024 16:15:14 +0100 Subject: [PATCH 12/24] Refactored the baseclasses for more logical inheritance for the two abstract classes --- scikit_mol/fingerprints/baseclasses.py | 89 +++++++---- tests/test_fptransformersgenerator.py | 202 +++++++++++++++---------- 2 files changed, 179 insertions(+), 112 deletions(-) diff --git a/scikit_mol/fingerprints/baseclasses.py b/scikit_mol/fingerprints/baseclasses.py index ce28e18..03ca11b 100644 --- a/scikit_mol/fingerprints/baseclasses.py +++ b/scikit_mol/fingerprints/baseclasses.py @@ -36,18 +36,16 @@ ) -class FpsTransformer(ABC, BaseEstimator, TransformerMixin): +class BaseFpsTransformer(ABC, BaseEstimator, TransformerMixin): def __init__( self, parallel: Union[bool, int] = False, start_method: str = None, safe_inference_mode: bool = False, - dtype: np.dtype = np.int8, ): self.parallel = parallel self.start_method = start_method self.safe_inference_mode = safe_inference_mode - self.dtype = dtype @property def nBits(self): @@ -98,34 +96,25 @@ def get_feature_names_out(self, input_features=None): prefix = self._get_column_prefix() return np.array([f"{prefix}_{i}" for i in range(1, self.fpSize + 1)]) - @abstractmethod - def _mol2fp(self, mol): - """Generate fingerprint from mol - - MUST BE OVERWRITTEN - """ - raise NotImplementedError("_mol2fp not implemented") - - def _fp2array(self, fp): - if fp: - arr = np.zeros((self.fpSize,), dtype=self.dtype) - DataStructs.ConvertToNumpyArray(fp, arr) - return arr - else: - return np.ma.masked_all((self.fpSize,), dtype=self.dtype) - - def _transform_mol(self, mol): + def _safe_transform_mol(self, mol): + """Handle safe inference mode with masked arrays""" if not mol and self.safe_inference_mode: - return self._fp2array(False) + return np.ma.masked_all(self.fpSize) + try: - fp = self._mol2fp(mol) - return self._fp2array(fp) + result = self._transform_mol(mol) + return result except Exception as e: if self.safe_inference_mode: - return self._fp2array(False) + return np.ma.masked_all(self.fpSize) else: raise e + @abstractmethod + def _transform_mol(self, mol): + """Transform a single molecule to numpy array""" + raise NotImplementedError + def fit(self, X, y=None): """Included for scikit-learn compatibility @@ -137,15 +126,20 @@ def fit(self, X, y=None): def _transform(self, X): if self.safe_inference_mode: # Use the new method with masked arrays if we're in safe inference mode - arrays = [self._transform_mol(mol) for mol in X] + arrays = [self._safe_transform_mol(mol) for mol in X] print(arrays) return np.ma.stack(arrays) - else: + elif hasattr( + self, "dtype" + ): # TODO, it seems a bit of a code smell that we have to preemptively test a property from the baseclass? # Use the original, faster method if we're not in safe inference mode arr = np.zeros((len(X), self.fpSize), dtype=self.dtype) for i, mol in enumerate(X): arr[i, :] = self._transform_mol(mol) return arr + else: # We are unsure on the dtype, so we don't use a preassigned array #TODO test time differnece to previous + arrays = [self._transform_mol(mol) for mol in X] + return np.stack(arrays) def _transform_sparse(self, X): arr = np.zeros((len(X), self.fpSize), dtype=self.dtype) @@ -202,20 +196,49 @@ def transform(self, X, y=None): return arr -class FpsGeneratorTransformer(FpsTransformer): - _regenerate_on_properties = () +class FpsTransformer(BaseFpsTransformer): + """Classic fingerprint transformer using mol2fp pattern""" - def _fp2array(self, fp): - raise DeprecationWarning("Generators can directly return fingerprints") + def __init__( + self, + parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, + dtype: np.dtype = np.int8, + ): + super().__init__(parallel=parallel, safe_inference_mode=safe_inference_mode) + self.dtype = dtype + def _transform_mol(self, mol): + """Implements the mol -> rdkit fingerprint data structure -> numpy array pattern""" + fp = self._mol2fp(mol) + return self._fp2array(fp) + + @abstractmethod def _mol2fp(self, mol): - raise DeprecationWarning("use _mol2array") + """Generate fingerprint from mol + + MUST BE OVERWRITTEN + """ + raise NotImplementedError("_mol2fp not implemented") + + def _fp2array(self, fp): + """Convert RDKit fingerprint data structure to numpy array""" + if fp: + arr = np.zeros((self.fpSize,), dtype=self.dtype) + DataStructs.ConvertToNumpyArray(fp, arr) + return arr + else: + return np.ma.masked_all((self.fpSize,), dtype=self.dtype) + + +class FpsGeneratorTransformer(BaseFpsTransformer): + _regenerate_on_properties = () def __getstate__(self): # Get the state of the parent class state = super().__getstate__() state.update(self.get_params()) - # Remove the unpicklable property from the state + # Remove the potentiallyunpicklable property from the state state.pop("_fpgen", None) # fpgen is not picklable return state @@ -234,6 +257,8 @@ def __setstate__(self, state): ] self._generate_fp_generator() + # TODO: overload set_params in order to not make multiple calls to _generate_fp_generator + def __setattr__(self, name: str, value): super().__setattr__(name, value) if ( diff --git a/tests/test_fptransformersgenerator.py b/tests/test_fptransformersgenerator.py index 81da19c..c61f6e9 100644 --- a/tests/test_fptransformersgenerator.py +++ b/tests/test_fptransformersgenerator.py @@ -2,25 +2,38 @@ import tempfile import pytest import numpy as np -from fixtures import mols_list, smiles_list, mols_container, smiles_container, fingerprint, chiral_smiles_list, chiral_mols_list +from fixtures import ( + mols_list, + smiles_list, + mols_container, + smiles_container, + fingerprint, + chiral_smiles_list, + chiral_mols_list, +) from sklearn import clone -from scikit_mol.fingerprints import (MorganFPGeneratorTransformer, - RDKitFPGeneratorTransformer, - AtomPairFPGeneratorTransformer, - TopologicalTorsionFPGeneatorTransformer, - ) +from scikit_mol.fingerprints import ( + MorganFPGeneratorTransformer, + RDKitFPGeneratorTransformer, + AtomPairFPGeneratorTransformer, + TopologicalTorsionFPGeneatorTransformer, +) -test_transformers = [MorganFPGeneratorTransformer, RDKitFPGeneratorTransformer, - AtomPairFPGeneratorTransformer, TopologicalTorsionFPGeneatorTransformer] +test_transformers = [ + MorganFPGeneratorTransformer, + RDKitFPGeneratorTransformer, + AtomPairFPGeneratorTransformer, + TopologicalTorsionFPGeneatorTransformer, +] -@pytest.mark.parametrize("transformer_class", test_transformers) -def test_fpstransformer_fp2array(transformer_class, fingerprint): - transformer = transformer_class() +# @pytest.mark.parametrize("transformer_class", test_transformers) +# def test_fpstransformer_fp2array(transformer_class, fingerprint): +# transformer = transformer_class() - with pytest.raises(DeprecationWarning, match='Generators can directly return fingerprints'): - fp = transformer._fp2array(fingerprint) +# with pytest.raises(DeprecationWarning, match='Generators can directly return fingerprints'): +# fp = transformer._fp2array(fingerprint) @pytest.mark.parametrize("transformer_class", test_transformers) @@ -28,75 +41,79 @@ def test_fpstransformer_transform_mol(transformer_class, mols_list): transformer = transformer_class() fp = transformer._transform_mol(mols_list[0]) - #See that fp is the correct type, shape and bit count - assert(type(fp) == type(np.array([0]))) - assert(fp.shape == (2048,)) + # See that fp is the correct type, shape and bit count + assert type(fp) == type(np.array([0])) + assert fp.shape == (2048,) if isinstance(transformer, RDKitFPGeneratorTransformer): - assert(fp.sum() == 104) + assert fp.sum() == 104 elif isinstance(transformer, AtomPairFPGeneratorTransformer): - assert (fp.sum() == 32) + assert fp.sum() == 32 elif isinstance(transformer, TopologicalTorsionFPGeneatorTransformer): - assert (fp.sum() == 12) + assert fp.sum() == 12 elif isinstance(transformer, MorganFPGeneratorTransformer): - assert (fp.sum() == 14) + assert fp.sum() == 14 else: raise NotImplementedError("missing Assert") + @pytest.mark.parametrize("transformer_class", test_transformers) def test_clonability(transformer_class): transformer = transformer_class() - params = transformer.get_params() + params = transformer.get_params() t2 = clone(transformer) params_2 = t2.get_params() - #Parameters of cloned transformers should be the same - assert all([ params[key] == params_2[key] for key in params.keys()]) - #Cloned transformers should not be the same object + # Parameters of cloned transformers should be the same + assert all([params[key] == params_2[key] for key in params.keys()]) + # Cloned transformers should not be the same object assert t2 != transformer + @pytest.mark.parametrize("transformer_class", test_transformers) def test_set_params(transformer_class): transformer = transformer_class() - params = transformer.get_params() - #change extracted dictionary - params['fpSize'] = 4242 - #change params in transformer - transformer.set_params(fpSize = 4242) + params = transformer.get_params() + # change extracted dictionary + params["fpSize"] = 4242 + # change params in transformer + transformer.set_params(fpSize=4242) # get parameters as dictionary and assert that it is the same params_2 = transformer.get_params() - assert all([ params[key] == params_2[key] for key in params.keys()]) + assert all([params[key] == params_2[key] for key in params.keys()]) + @pytest.mark.parametrize("transformer_class", test_transformers) def test_transform(mols_container, transformer_class): transformer = transformer_class() - #Test the different transformers - params = transformer.get_params() + # Test the different transformers + params = transformer.get_params() fps = transformer.transform(mols_container) - #Assert that the same length of input and output + # Assert that the same length of input and output assert len(fps) == len(mols_container) - fpsize = params['fpSize'] + fpsize = params["fpSize"] assert len(fps[0]) == fpsize + @pytest.mark.parametrize("transformer_class", test_transformers) def test_transform_parallel(mols_container, transformer_class): transformer = transformer_class() - #Test the different transformers + # Test the different transformers transformer.set_params(parallel=True) - params = transformer.get_params() + params = transformer.get_params() fps = transformer.transform(mols_container) - #Assert that the same length of input and output + # Assert that the same length of input and output assert len(fps) == len(mols_container) - fpsize = params['fpSize'] + fpsize = params["fpSize"] assert len(fps[0]) == fpsize @pytest.mark.parametrize("transformer_class", test_transformers) def test_picklable(transformer_class): - #Test the different transformers + # Test the different transformers transformer = transformer_class() p = transformer.get_params() @@ -107,8 +124,8 @@ def test_picklable(transformer_class): print(p) print(vars(transformer)) print(vars(t2)) - assert(transformer.get_params() == t2.get_params()) - + assert transformer.get_params() == t2.get_params() + @pytest.mark.parametrize("transfomer", test_transformers) def assert_transformer_set_params(transfomer, new_params, mols_list): @@ -128,20 +145,36 @@ def assert_transformer_set_params(transfomer, new_params, mols_list): # Now fp_default should not be the same as fp_reset_params - assert ~np.all([np.array_equal(fp_default, fp_reset_params) for fp_default, fp_reset_params in zip(fps_default, fps_reset_params)]), f"Assertation error, FP appears the same, although the {key} should be changed from {default_params[key]} to {params[key]}" + assert ~np.all( + [ + np.array_equal(fp_default, fp_reset_params) + for fp_default, fp_reset_params in zip(fps_default, fps_reset_params) + ] + ), f"Assertation error, FP appears the same, although the {key} should be changed from {default_params[key]} to {params[key]}" # fp_reset_params and fp_init_new_params should however be the same - assert np.all([np.array_equal(fp_init_new_params, fp_reset_params) for fp_init_new_params, fp_reset_params in zip(fps_init_new_params, fps_reset_params)]) , f"Assertation error, FP appears to be different, although the {key} should be changed back as well as initialized to {params[key]}" + assert np.all( + [ + np.array_equal(fp_init_new_params, fp_reset_params) + for fp_init_new_params, fp_reset_params in zip( + fps_init_new_params, fps_reset_params + ) + ] + ), f"Assertation error, FP appears to be different, although the {key} should be changed back as well as initialized to {params[key]}" def test_morgan_set_params(chiral_mols_list): - new_params = {'fpSize': 1024, - 'radius': 1, - 'useBondTypes': False,# TODO, why doesn't this change the FP? - 'useChirality': True, - 'useCounts': True, - 'useFeatures': True} - - assert_transformer_set_params(MorganFPGeneratorTransformer, new_params, chiral_mols_list) + new_params = { + "fpSize": 1024, + "radius": 1, + "useBondTypes": False, # TODO, why doesn't this change the FP? + "useChirality": True, + "useCounts": True, + "useFeatures": True, + } + + assert_transformer_set_params( + MorganFPGeneratorTransformer, new_params, chiral_mols_list + ) def test_atompairs_set_params(chiral_mols_list): @@ -150,39 +183,48 @@ def test_atompairs_set_params(chiral_mols_list): #'confId': -1, #'fromAtoms': 1, #'ignoreAtoms': 0, - 'includeChirality': True, - 'maxLength': 3, - 'minLength': 3, - 'fpSize': 1024, + "includeChirality": True, + "maxLength": 3, + "minLength": 3, + "fpSize": 1024, #'nBitsPerEntry': 3, #Todo: not setable with the generators? #'use2D': True, #TODO, understand why this can't be set different - 'useCounts': True} - - assert_transformer_set_params(AtomPairFPGeneratorTransformer, new_params, chiral_mols_list) + "useCounts": True, + } + + assert_transformer_set_params( + AtomPairFPGeneratorTransformer, new_params, chiral_mols_list + ) def test_topologicaltorsion_set_params(chiral_mols_list): - new_params = {#'atomInvariants': 0, - #'fromAtoms': 0, - #'ignoreAtoms': 0, - #'includeChirality': True, #TODO, figure out why this setting seems to give same FP wheter toggled or not - 'fpSize': 1024, - #'nBitsPerEntry': 3, #Todo: not setable with the generators? - 'targetSize': 5, - 'useCounts': True} - - assert_transformer_set_params(TopologicalTorsionFPGeneatorTransformer, new_params, chiral_mols_list) + new_params = { #'atomInvariants': 0, + #'fromAtoms': 0, + #'ignoreAtoms': 0, + #'includeChirality': True, #TODO, figure out why this setting seems to give same FP wheter toggled or not + "fpSize": 1024, + #'nBitsPerEntry': 3, #Todo: not setable with the generators? + "targetSize": 5, + "useCounts": True, + } + + assert_transformer_set_params( + TopologicalTorsionFPGeneatorTransformer, new_params, chiral_mols_list + ) + def test_RDKitFPTransformer(chiral_mols_list): - new_params = {#'atomInvariantsGenerator': None, - #'branchedPaths': False, - #'countBounds': 0, #TODO: What does this do? - 'countSimulation': True, - 'fpSize': 1024, - 'maxPath': 3, - 'minPath': 2, - 'numBitsPerFeature': 3, - 'useBondOrder': False, #TODO, why doesn't this change the FP? - #'useHs': False, #TODO, why doesn't this change the FP? - } - assert_transformer_set_params(RDKitFPGeneratorTransformer, new_params, chiral_mols_list) + new_params = { #'atomInvariantsGenerator': None, + #'branchedPaths': False, + #'countBounds': 0, #TODO: What does this do? + "countSimulation": True, + "fpSize": 1024, + "maxPath": 3, + "minPath": 2, + "numBitsPerFeature": 3, + "useBondOrder": False, # TODO, why doesn't this change the FP? + #'useHs': False, #TODO, why doesn't this change the FP? + } + assert_transformer_set_params( + RDKitFPGeneratorTransformer, new_params, chiral_mols_list + ) From ff8cf2eea6199d60ec4bee4e893d114d8d7add8c Mon Sep 17 00:00:00 2001 From: Esben Jannik Bjerrum Date: Fri, 22 Nov 2024 16:43:11 +0100 Subject: [PATCH 13/24] Updated child classes to honor the safe_inference_mode --- scikit_mol/fingerprints/atompair.py | 4 ++-- scikit_mol/fingerprints/baseclasses.py | 1 - scikit_mol/fingerprints/minhash.py | 2 ++ scikit_mol/fingerprints/morgan.py | 7 ++++++- scikit_mol/fingerprints/rdkitfp.py | 3 ++- scikit_mol/fingerprints/topologicaltorsion.py | 3 ++- 6 files changed, 14 insertions(+), 6 deletions(-) diff --git a/scikit_mol/fingerprints/atompair.py b/scikit_mol/fingerprints/atompair.py index aff8f9f..2198afd 100644 --- a/scikit_mol/fingerprints/atompair.py +++ b/scikit_mol/fingerprints/atompair.py @@ -100,9 +100,10 @@ def __init__( fpSize: int = 2048, useCounts: bool = False, parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, ): self._initializing = True - super().__init__(parallel=parallel) + super().__init__(parallel=parallel, safe_inference_mode=safe_inference_mode) self.fpSize = fpSize self.use2D = use2D self.includeChirality = includeChirality @@ -114,7 +115,6 @@ def __init__( self.fromAtoms = fromAtoms self.ignoreAtoms = ignoreAtoms self.atomInvariants = atomInvariants - self._generate_fp_generator() delattr(self, "_initializing") diff --git a/scikit_mol/fingerprints/baseclasses.py b/scikit_mol/fingerprints/baseclasses.py index 03ca11b..d1eef40 100644 --- a/scikit_mol/fingerprints/baseclasses.py +++ b/scikit_mol/fingerprints/baseclasses.py @@ -127,7 +127,6 @@ def _transform(self, X): if self.safe_inference_mode: # Use the new method with masked arrays if we're in safe inference mode arrays = [self._safe_transform_mol(mol) for mol in X] - print(arrays) return np.ma.stack(arrays) elif hasattr( self, "dtype" diff --git a/scikit_mol/fingerprints/minhash.py b/scikit_mol/fingerprints/minhash.py index 1c7e62a..9d0ec31 100644 --- a/scikit_mol/fingerprints/minhash.py +++ b/scikit_mol/fingerprints/minhash.py @@ -9,6 +9,7 @@ from rdkit.Chem import rdMHFPFingerprint +# TODO move to use FpsGeneratorTransformer class MHFingerprintTransformer(FpsTransformer): def __init__( self, @@ -105,6 +106,7 @@ def n_permutations(self, n_permutations): self._recreate_encoder() +# TODO use FpsGeneratorTransformer instead class SECFingerprintTransformer(FpsTransformer): # https://jcheminf.biomedcentral.com/articles/10.1186/s13321-018-0321-8 def __init__( diff --git a/scikit_mol/fingerprints/morgan.py b/scikit_mol/fingerprints/morgan.py index 37d7cf8..f7d6067 100644 --- a/scikit_mol/fingerprints/morgan.py +++ b/scikit_mol/fingerprints/morgan.py @@ -98,6 +98,7 @@ def __init__( useFeatures=False, useCounts=False, parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, ): """Transform RDKit mols into Count or bit-based hashed MorganFingerprints @@ -115,10 +116,14 @@ def __init__( use chemical features, rather than atom-type in calculation of the fingerprint keys, by default False useCounts : bool, optional If toggled will create the count and not bit-based fingerprint, by default False + parallel : bool or int, optional + If True, will use all available cores, if int will use that many cores, by default False + safe_inference_mode : bool, optional + If True, will return masked arrays for invalid mols, by default False """ self._initializing = True - super().__init__(parallel=parallel) + super().__init__(parallel=parallel, safe_inference_mode=safe_inference_mode) self.fpSize = fpSize self.radius = radius self.useChirality = useChirality diff --git a/scikit_mol/fingerprints/rdkitfp.py b/scikit_mol/fingerprints/rdkitfp.py index 28ce0a8..ad87a26 100644 --- a/scikit_mol/fingerprints/rdkitfp.py +++ b/scikit_mol/fingerprints/rdkitfp.py @@ -114,6 +114,7 @@ def __init__( numBitsPerFeature: int = 2, useCounts: bool = False, parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, ): """Calculates the RDKit fingerprints @@ -139,7 +140,7 @@ def __init__( the number of bits set per path/subgraph found, by default 2 """ self._initializing = True - super().__init__(parallel=parallel) + super().__init__(parallel=parallel, safe_inference_mode=safe_inference_mode) self.minPath = minPath self.maxPath = maxPath self.useHs = useHs diff --git a/scikit_mol/fingerprints/topologicaltorsion.py b/scikit_mol/fingerprints/topologicaltorsion.py index 0b6640d..63b68bf 100644 --- a/scikit_mol/fingerprints/topologicaltorsion.py +++ b/scikit_mol/fingerprints/topologicaltorsion.py @@ -80,9 +80,10 @@ def __init__( fpSize: int = 2048, useCounts: bool = False, parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, ): self._initializing = True - super().__init__(parallel=parallel) + super().__init__(parallel=parallel, safe_inference_mode=safe_inference_mode) self.fpSize = fpSize self.includeChirality = includeChirality self.targetSize = targetSize From 7d9941ad1630176ff884c31c67c57bfd5ac477cd Mon Sep 17 00:00:00 2001 From: Esben Jannik Bjerrum Date: Fri, 22 Nov 2024 20:45:14 +0100 Subject: [PATCH 14/24] working prototype for the soft transition to generator classes with proper deprecation warnings. Only implemented in the MorganFP so far --- scikit_mol/fingerprints/baseclasses.py | 57 ++++++++++++++++++++++---- scikit_mol/fingerprints/morgan.py | 4 ++ 2 files changed, 54 insertions(+), 7 deletions(-) diff --git a/scikit_mol/fingerprints/baseclasses.py b/scikit_mol/fingerprints/baseclasses.py index d1eef40..5ce7f00 100644 --- a/scikit_mol/fingerprints/baseclasses.py +++ b/scikit_mol/fingerprints/baseclasses.py @@ -2,7 +2,8 @@ import multiprocessing import re import inspect -from warnings import warn +from warnings import warn, simplefilter + from typing import Union from rdkit import DataStructs @@ -30,6 +31,7 @@ from abc import ABC, abstractmethod +simplefilter("always", DeprecationWarning) _PATTERN_FINGERPRINT_TRANSFORMER = re.compile( r"^(?P\w+)FingerprintTransformer$" @@ -47,21 +49,26 @@ def __init__( self.start_method = start_method self.safe_inference_mode = safe_inference_mode + # TODO, remove when finally deprecating nBits and dtype @property def nBits(self): warn( - "nBits will be replace by fpSize, due to changes harmonization!", + "nBits will be replaced by fpSize, due to changes harmonization!", DeprecationWarning, + stacklevel=2, ) return self.fpSize + # TODO, remove when finally deprecating nBits and dtype @nBits.setter def nBits(self, nBits): - warn( - "nBits will be replace by fpSize, due to changes harmonization!", - DeprecationWarning, - ) - self.fpSize = nBits + if nBits is not None: + warn( + "nBits will be replaced by fpSize, due to changes harmonization!", + DeprecationWarning, + stacklevel=3, + ) + self.fpSize = nBits def _get_column_prefix(self) -> str: matched = _PATTERN_FINGERPRINT_TRANSFORMER.match(type(self).__name__) @@ -229,6 +236,13 @@ def _fp2array(self, fp): else: return np.ma.masked_all((self.fpSize,), dtype=self.dtype) + # TODO, remove when finally deprecating nBits + def _get_param_names(self): + """Get parameter names excluding deprecated parameters""" + params = super()._get_param_names() + # Remove deprecated parameters before they're accessed + return [p for p in params if p not in ("nBits")] + class FpsGeneratorTransformer(BaseFpsTransformer): _regenerate_on_properties = () @@ -278,6 +292,35 @@ def _transform_mol(self, mol) -> np.array: """ raise NotImplementedError("_transform_mol not implemented") + # TODO, remove when finally deprecating nBits and dtype + @property + def dtype(self): + warn( + "dtype is no longer supported, due to move to generator based fingerprints", + DeprecationWarning, + stacklevel=2, + ) + return None + + # TODO, remove when finally deprecating nBits and dtype + @dtype.setter + def dtype(self, dtype): + if dtype is not None: + print("Tester") + warn( + "dtype is no longer supported, due to move to generator based fingerprints", + DeprecationWarning, + stacklevel=3, + ) + pass + + # TODO, remove when finally deprecating nBits and dtype + def _get_param_names(self): + """Get parameter names excluding deprecated parameters""" + params = super()._get_param_names() + # Remove deprecated parameters before they're accessed + return [p for p in params if p not in ("dtype", "nBits")] + def parallel_helper(args): """Parallel_helper takes a tuple with classname, the objects parameters and the mols to process. diff --git a/scikit_mol/fingerprints/morgan.py b/scikit_mol/fingerprints/morgan.py index f7d6067..8b7292f 100644 --- a/scikit_mol/fingerprints/morgan.py +++ b/scikit_mol/fingerprints/morgan.py @@ -99,6 +99,8 @@ def __init__( useCounts=False, parallel: Union[bool, int] = False, safe_inference_mode: bool = False, + dtype: np.dtype = None, + nBits: int = None, ): """Transform RDKit mols into Count or bit-based hashed MorganFingerprints @@ -130,6 +132,8 @@ def __init__( self.useFeatures = useFeatures self.useCounts = useCounts self.useBondTypes = useBondTypes + self.dtype = dtype + self.nBits = nBits self._generate_fp_generator() delattr(self, "_initializing") From d425b72968769f0a76905b62fe1fa8df27fb9a01 Mon Sep 17 00:00:00 2001 From: Esben Jannik Bjerrum Date: Sun, 24 Nov 2024 08:11:19 +0100 Subject: [PATCH 15/24] Minor fixes in baseclasses --- scikit_mol/fingerprints/baseclasses.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scikit_mol/fingerprints/baseclasses.py b/scikit_mol/fingerprints/baseclasses.py index 5ce7f00..8664630 100644 --- a/scikit_mol/fingerprints/baseclasses.py +++ b/scikit_mol/fingerprints/baseclasses.py @@ -245,6 +245,8 @@ def _get_param_names(self): class FpsGeneratorTransformer(BaseFpsTransformer): + """Abstract base class for fingerprint transformers based on (unpicklable)fingerprint generators""" + _regenerate_on_properties = () def __getstate__(self): @@ -286,7 +288,7 @@ def _generate_fp_generator(self): @abstractmethod def _transform_mol(self, mol) -> np.array: - """Generate numpy array descriptor from mol + """Generate numpy array descriptor from RDKit molecule MUST BE OVERWRITTEN """ @@ -306,7 +308,6 @@ def dtype(self): @dtype.setter def dtype(self, dtype): if dtype is not None: - print("Tester") warn( "dtype is no longer supported, due to move to generator based fingerprints", DeprecationWarning, From f73558f5295fc58b726d9105bb5d8290e701497a Mon Sep 17 00:00:00 2001 From: Esben Jannik Bjerrum Date: Sun, 24 Nov 2024 08:16:52 +0100 Subject: [PATCH 16/24] Fixed exotic types --- scikit_mol/fingerprints/morgan.py | 89 +++++++++++++++++++++++++++++-- 1 file changed, 84 insertions(+), 5 deletions(-) diff --git a/scikit_mol/fingerprints/morgan.py b/scikit_mol/fingerprints/morgan.py index 8b7292f..6ddb6b9 100644 --- a/scikit_mol/fingerprints/morgan.py +++ b/scikit_mol/fingerprints/morgan.py @@ -14,7 +14,7 @@ from .baseclasses import FpsTransformer, FpsGeneratorTransformer -class MorganFingerprintTransformer(FpsTransformer): +class MorganFingerprintTransformerClassic(FpsTransformer): def __init__( self, fpSize=2048, @@ -80,6 +80,85 @@ def _mol2fp(self, mol): ) +class MorganFingerprintTransformer(FpsGeneratorTransformer): + _regenerate_on_properties = ( + "radius", + "fpSize", + "useChirality", + "useFeatures", + "useBondTypes", + ) + + def __init__( + self, + fpSize=2048, + radius=2, + useChirality=False, + useBondTypes=True, + useFeatures=False, + useCounts=False, + parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, + dtype: np.dtype = None, + nBits: int = None, + ): + """Transform RDKit mols into Count or bit-based hashed MorganFingerprints + + Parameters + ---------- + fpsize : int, optional + Size of the hashed fingerprint, by default 2048 + radius : int, optional + Radius of the fingerprint, by default 2 + useChirality : bool, optional + Include chirality in calculation of the fingerprint keys, by default False + useBondTypes : bool, optional + Include bondtypes in calculation of the fingerprint keys, by default True + useFeatures : bool, optional + use chemical features, rather than atom-type in calculation of the fingerprint keys, by default False + useCounts : bool, optional + If toggled will create the count and not bit-based fingerprint, by default False + parallel : bool or int, optional + If True, will use all available cores, if int will use that many cores, by default False + safe_inference_mode : bool, optional + If True, will return masked arrays for invalid mols, by default False + """ + + self._initializing = True + super().__init__(parallel=parallel, safe_inference_mode=safe_inference_mode) + self.fpSize = fpSize + self.radius = radius + self.useChirality = useChirality + self.useFeatures = useFeatures + self.useCounts = useCounts + self.useBondTypes = useBondTypes + self.dtype = dtype + self.nBits = nBits + + self._generate_fp_generator() + delattr(self, "_initializing") + + def _generate_fp_generator(self): + if self.useFeatures: + atomInvariantsGenerator = GetMorganFeatureAtomInvGen() + else: + atomInvariantsGenerator = None + + self._fpgen = GetMorganGenerator( + radius=int(self.radius), + fpSize=int(self.fpSize), + includeChirality=bool(self.useChirality), + useBondTypes=bool(self.useBondTypes), + atomInvariantsGenerator=atomInvariantsGenerator, + ) + + def _transform_mol(self, mol) -> np.array: + if self.useCounts: + return self._fpgen.GetCountFingerprintAsNumPy(mol) + else: + return self._fpgen.GetFingerprintAsNumPy(mol) + + class MorganFPGeneratorTransformer(FpsGeneratorTransformer): _regenerate_on_properties = ( "radius", @@ -145,10 +224,10 @@ def _generate_fp_generator(self): atomInvariantsGenerator = None self._fpgen = GetMorganGenerator( - radius=self.radius, - fpSize=self.fpSize, - includeChirality=self.useChirality, - useBondTypes=self.useBondTypes, + radius=int(self.radius), + fpSize=int(self.fpSize), + includeChirality=bool(self.useChirality), + useBondTypes=bool(self.useBondTypes), atomInvariantsGenerator=atomInvariantsGenerator, ) From 96b5b4211e79d399ea027e9b462c61b5f76584b8 Mon Sep 17 00:00:00 2001 From: Esben Jannik Bjerrum Date: Sun, 24 Nov 2024 08:23:06 +0100 Subject: [PATCH 17/24] Fixed test to use new baseclass --- tests/test_transformers.py | 227 +++++++++++++++++++++++++------------ 1 file changed, 156 insertions(+), 71 deletions(-) diff --git a/tests/test_transformers.py b/tests/test_transformers.py index b96d421..9352441 100644 --- a/tests/test_transformers.py +++ b/tests/test_transformers.py @@ -15,14 +15,33 @@ from sklearn.ensemble import RandomForestRegressor from scikit_mol.conversions import SmilesToMolTransformer from scikit_mol.core import SKLEARN_VERSION_PANDAS_OUT -from scikit_mol.fingerprints import (FpsTransformer, MACCSKeysFingerprintTransformer, RDKitFingerprintTransformer, AtomPairFingerprintTransformer, - TopologicalTorsionFingerprintTransformer, MorganFingerprintTransformer, SECFingerprintTransformer, - MHFingerprintTransformer, AvalonFingerprintTransformer, MorganFPGeneratorTransformer, - RDKitFPGeneratorTransformer, AtomPairFPGeneratorTransformer, TopologicalTorsionFPGeneatorTransformer) +from scikit_mol.fingerprints import ( + MACCSKeysFingerprintTransformer, + RDKitFingerprintTransformer, + AtomPairFingerprintTransformer, + TopologicalTorsionFingerprintTransformer, + MorganFingerprintTransformer, + SECFingerprintTransformer, + MHFingerprintTransformer, + AvalonFingerprintTransformer, + MorganFPGeneratorTransformer, + RDKitFPGeneratorTransformer, + AtomPairFPGeneratorTransformer, + TopologicalTorsionFPGeneatorTransformer, +) +from scikit_mol.fingerprints.baseclasses import BaseFpsTransformer from scikit_mol.descriptors import MolecularDescriptorTransformer -from fixtures import SLC6A4_subset, SLC6A4_subset_with_cddd, skip_pandas_output_test, mols_container, featurizer, combined_transformer +from fixtures import ( + SLC6A4_subset, + SLC6A4_subset_with_cddd, + skip_pandas_output_test, + mols_container, + featurizer, + combined_transformer, +) + def test_transformer(SLC6A4_subset): # load some toy data for quick testing on a small number of samples @@ -31,45 +50,83 @@ def test_transformer(SLC6A4_subset): X_train, X_test = X_smiles[:128], X_smiles[128:] Y_train, Y_test = Y[:128], Y[128:] - MorganFPGeneratorTransformer, - RDKitFPGeneratorTransformer, AtomPairFPGeneratorTransformer, TopologicalTorsionFPGeneatorTransformer + (MorganFPGeneratorTransformer,) + ( + RDKitFPGeneratorTransformer, + AtomPairFPGeneratorTransformer, + TopologicalTorsionFPGeneatorTransformer, + ) # run FP with default parameters except when useCounts can be given as an argument - FP_dict = {"MACCSTransformer": [MACCSKeysFingerprintTransformer, None], - "RDKitFPTransformer": [RDKitFingerprintTransformer, None], - "AtomPairFingerprintTransformer": [AtomPairFingerprintTransformer, False], - "AtomPairFingerprintTransformer useCounts": [AtomPairFingerprintTransformer, True], - "TopologicalTorsionFingerprintTransformer": [TopologicalTorsionFingerprintTransformer, False], - "TopologicalTorsionFingerprintTransformer useCounts": [TopologicalTorsionFingerprintTransformer, True], - "MorganTransformer": [MorganFingerprintTransformer, False], - "MorganTransformer useCounts": [MorganFingerprintTransformer, True], - "SECFingerprintTransformer": [SECFingerprintTransformer, None], - "MHFingerprintTransformer": [MHFingerprintTransformer, None], - 'AvalonFingerprintTransformer': [AvalonFingerprintTransformer, None], - 'MorganFPGeneratorTransformer': [MorganFPGeneratorTransformer, True], - 'MorganFPGeneratorTransformer': [MorganFPGeneratorTransformer, False], - 'RDKitFPGeneratorTransformer': [RDKitFPGeneratorTransformer, None], - 'AtomPairFPGeneratorTransformer': [AtomPairFPGeneratorTransformer, True], - 'AtomPairFPGeneratorTransformer': [ AtomPairFPGeneratorTransformer, False], - 'TopologicalTorsionFPGeneatorTransformer': [TopologicalTorsionFPGeneatorTransformer, True], - 'TopologicalTorsionFPGeneatorTransformer': [ TopologicalTorsionFPGeneatorTransformer, False], - } + FP_dict = { + "MACCSTransformer": [MACCSKeysFingerprintTransformer, None], + "RDKitFPTransformer": [RDKitFingerprintTransformer, None], + "AtomPairFingerprintTransformer": [AtomPairFingerprintTransformer, False], + "AtomPairFingerprintTransformer useCounts": [ + AtomPairFingerprintTransformer, + True, + ], + "TopologicalTorsionFingerprintTransformer": [ + TopologicalTorsionFingerprintTransformer, + False, + ], + "TopologicalTorsionFingerprintTransformer useCounts": [ + TopologicalTorsionFingerprintTransformer, + True, + ], + "MorganTransformer": [MorganFingerprintTransformer, False], + "MorganTransformer useCounts": [MorganFingerprintTransformer, True], + "SECFingerprintTransformer": [SECFingerprintTransformer, None], + "MHFingerprintTransformer": [MHFingerprintTransformer, None], + "AvalonFingerprintTransformer": [AvalonFingerprintTransformer, None], + "MorganFPGeneratorTransformer": [MorganFPGeneratorTransformer, True], + "MorganFPGeneratorTransformer": [MorganFPGeneratorTransformer, False], + "RDKitFPGeneratorTransformer": [RDKitFPGeneratorTransformer, None], + "AtomPairFPGeneratorTransformer": [AtomPairFPGeneratorTransformer, True], + "AtomPairFPGeneratorTransformer": [AtomPairFPGeneratorTransformer, False], + "TopologicalTorsionFPGeneatorTransformer": [ + TopologicalTorsionFPGeneatorTransformer, + True, + ], + "TopologicalTorsionFPGeneatorTransformer": [ + TopologicalTorsionFPGeneatorTransformer, + False, + ], + } # fit on toy data and print train/test score if successful or collect the failed FP failed_FP = [] for FP_name, (FP, useCounts) in FP_dict.items(): try: - print(f"\nrunning pipeline fitting and scoring for {FP_name} with useCounts={useCounts}") + print( + f"\nrunning pipeline fitting and scoring for {FP_name} with useCounts={useCounts}" + ) if useCounts is None: - pipeline = Pipeline([("s2m", SmilesToMolTransformer()), ("FP", FP()), ("RF", RandomForestRegressor())]) + pipeline = Pipeline( + [ + ("s2m", SmilesToMolTransformer()), + ("FP", FP()), + ("RF", RandomForestRegressor()), + ] + ) else: - pipeline = Pipeline([("s2m", SmilesToMolTransformer()), ("FP", FP(useCounts=useCounts)), ("RF", RandomForestRegressor())]) + pipeline = Pipeline( + [ + ("s2m", SmilesToMolTransformer()), + ("FP", FP(useCounts=useCounts)), + ("RF", RandomForestRegressor()), + ] + ) pipeline.fit(X_train, Y_train) train_score = pipeline.score(X_train, Y_train) test_score = pipeline.score(X_test, Y_test) - print(f"\nfitting and scoring completed train_score={train_score}, test_score={test_score}") + print( + f"\nfitting and scoring completed train_score={train_score}, test_score={test_score}" + ) except: - print(f"\n!!!! FAILED pipeline fitting and scoring for {FP_name} with useCounts={useCounts}") + print( + f"\n!!!! FAILED pipeline fitting and scoring for {FP_name} with useCounts={useCounts}" + ) failed_FP.append(FP_name) pass @@ -84,57 +141,81 @@ def test_transformer_pandas_output(SLC6A4_subset, pandas_output): X_smiles = X_smiles.to_frame() # run FP with default parameters except when useCounts can be given as an argument - FP_dict = {"MACCSTransformer": [MACCSKeysFingerprintTransformer, None], - "RDKitFPTransformer": [RDKitFingerprintTransformer, None], - "AtomPairFingerprintTransformer": [AtomPairFingerprintTransformer, False], - "AtomPairFingerprintTransformer useCounts": [AtomPairFingerprintTransformer, True], - "TopologicalTorsionFingerprintTransformer": [TopologicalTorsionFingerprintTransformer, False], - "TopologicalTorsionFingerprintTransformer useCounts": [TopologicalTorsionFingerprintTransformer, True], - "MorganTransformer": [MorganFingerprintTransformer, False], - "MorganTransformer useCounts": [MorganFingerprintTransformer, True], - "SECFingerprintTransformer": [SECFingerprintTransformer, None], - "MHFingerprintTransformer": [MHFingerprintTransformer, None], - 'AvalonFingerprintTransformer': [AvalonFingerprintTransformer, None], - 'MorganFPGeneratorTransformer': [MorganFPGeneratorTransformer, - True], - 'MorganFPGeneratorTransformer': [MorganFPGeneratorTransformer, - False], - 'RDKitFPGeneratorTransformer': [RDKitFPGeneratorTransformer, - None], - 'AtomPairFPGeneratorTransformer': [ - AtomPairFPGeneratorTransformer, True], - 'AtomPairFPGeneratorTransformer': [ - AtomPairFPGeneratorTransformer, False], - 'TopologicalTorsionFPGeneatorTransformer': [ - TopologicalTorsionFPGeneatorTransformer, True], - 'TopologicalTorsionFPGeneatorTransformer': [ - TopologicalTorsionFPGeneatorTransformer, False], - } + FP_dict = { + "MACCSTransformer": [MACCSKeysFingerprintTransformer, None], + "RDKitFPTransformer": [RDKitFingerprintTransformer, None], + "AtomPairFingerprintTransformer": [AtomPairFingerprintTransformer, False], + "AtomPairFingerprintTransformer useCounts": [ + AtomPairFingerprintTransformer, + True, + ], + "TopologicalTorsionFingerprintTransformer": [ + TopologicalTorsionFingerprintTransformer, + False, + ], + "TopologicalTorsionFingerprintTransformer useCounts": [ + TopologicalTorsionFingerprintTransformer, + True, + ], + "MorganTransformer": [MorganFingerprintTransformer, False], + "MorganTransformer useCounts": [MorganFingerprintTransformer, True], + "SECFingerprintTransformer": [SECFingerprintTransformer, None], + "MHFingerprintTransformer": [MHFingerprintTransformer, None], + "AvalonFingerprintTransformer": [AvalonFingerprintTransformer, None], + "MorganFPGeneratorTransformer": [MorganFPGeneratorTransformer, True], + "MorganFPGeneratorTransformer": [MorganFPGeneratorTransformer, False], + "RDKitFPGeneratorTransformer": [RDKitFPGeneratorTransformer, None], + "AtomPairFPGeneratorTransformer": [AtomPairFPGeneratorTransformer, True], + "AtomPairFPGeneratorTransformer": [AtomPairFPGeneratorTransformer, False], + "TopologicalTorsionFPGeneatorTransformer": [ + TopologicalTorsionFPGeneatorTransformer, + True, + ], + "TopologicalTorsionFPGeneatorTransformer": [ + TopologicalTorsionFPGeneatorTransformer, + False, + ], + } # fit on toy data and check that the output is a pandas dataframe failed_FP = [] for FP_name, (FP, useCounts) in FP_dict.items(): try: - print(f"\nrunning pipeline fitting and scoring for {FP_name} with useCounts={useCounts}") + print( + f"\nrunning pipeline fitting and scoring for {FP_name} with useCounts={useCounts}" + ) if useCounts is None: pipeline = Pipeline([("s2m", SmilesToMolTransformer()), ("FP", FP())]) else: - pipeline = Pipeline([("s2m", SmilesToMolTransformer()), ("FP", FP(useCounts=useCounts))]) + pipeline = Pipeline( + [("s2m", SmilesToMolTransformer()), ("FP", FP(useCounts=useCounts))] + ) pipeline.fit(X_smiles) X_transformed = pipeline.transform(X_smiles) - assert isinstance(X_transformed, pd.DataFrame), f"the output of {FP_name} is not a pandas dataframe" - assert X_transformed.shape[0] == len(X_smiles), f"the number of rows in the output of {FP_name} is not equal to the number of samples" - assert len(X_transformed.columns) == pipeline.named_steps["FP"].fpSize, f"the number of columns in the output of {FP_name} is not equal to the number of bits" + assert isinstance( + X_transformed, pd.DataFrame + ), f"the output of {FP_name} is not a pandas dataframe" + assert ( + X_transformed.shape[0] == len(X_smiles) + ), f"the number of rows in the output of {FP_name} is not equal to the number of samples" + assert ( + len(X_transformed.columns) == pipeline.named_steps["FP"].fpSize + ), f"the number of columns in the output of {FP_name} is not equal to the number of bits" print(f"\nfitting and transforming completed") except Exception as err: - print(f"\n!!!! FAILED pipeline fitting and transforming for {FP_name} with useCounts={useCounts}") + print( + f"\n!!!! FAILED pipeline fitting and transforming for {FP_name} with useCounts={useCounts}" + ) print("\n".join(err.args)) failed_FP.append(FP_name) pass # overall result - assert len(failed_FP) == 0, f"the following FP have failed pandas transformation {failed_FP}" + assert ( + len(failed_FP) == 0 + ), f"the following FP have failed pandas transformation {failed_FP}" + @skip_pandas_output_test def test_pandas_out_same_values(featurizer, mols_container): @@ -149,22 +230,29 @@ def test_pandas_out_same_values(featurizer, mols_container): assert result_default.shape == result_pandas.shape featurizer_class_with_nan = MolecularDescriptorTransformer if isinstance(featurizer, featurizer_class_with_nan): - assert (pd.isna(result_default) == pd.isna(result_pandas.values)).all(), "NaN values are not in the same positions in the default and pandas output" - nan_replacement = 0. + assert ( + pd.isna(result_default) == pd.isna(result_pandas.values) + ).all(), ( + "NaN values are not in the same positions in the default and pandas output" + ) + nan_replacement = 0.0 result_default = np.nan_to_num(result_default, nan=nan_replacement) result_pandas = result_pandas.fillna(nan_replacement) else: assert (result_default == result_pandas.values).all() + @skip_pandas_output_test -def test_combined_transformer_pandas_out(combined_transformer, SLC6A4_subset_with_cddd, pandas_output): +def test_combined_transformer_pandas_out( + combined_transformer, SLC6A4_subset_with_cddd, pandas_output +): result = combined_transformer.fit_transform(SLC6A4_subset_with_cddd) assert isinstance(result, pd.DataFrame) assert result.shape[0] == SLC6A4_subset_with_cddd.shape[0] n_cddd_features = SLC6A4_subset_with_cddd.columns.str.match(r"^cddd_\d+$").sum() pipeline_skmol = combined_transformer.named_transformers_["pipeline-1"] featurizer_skmol = pipeline_skmol[-1] - if isinstance(featurizer_skmol, FpsTransformer): + if isinstance(featurizer_skmol, BaseFpsTransformer): n_skmol_features = featurizer_skmol.fpSize elif isinstance(featurizer_skmol, MolecularDescriptorTransformer): n_skmol_features = len(featurizer_skmol.desc_list) @@ -172,6 +260,3 @@ def test_combined_transformer_pandas_out(combined_transformer, SLC6A4_subset_wit raise ValueError(f"Unexpected featurizer type {type(featurizer_skmol)}") expected_n_features = n_cddd_features + n_skmol_features assert result.shape[1] == expected_n_features - - - From b8346f63f08c91be10b5e88ff332b790d0ac3773 Mon Sep 17 00:00:00 2001 From: Esben Jannik Bjerrum Date: Sun, 24 Nov 2024 09:08:34 +0100 Subject: [PATCH 18/24] Fixed the tests to move from classic to generator for morgan. Not the most flexible testing setup tbh --- tests/fixtures.py | 5 +++ tests/test_fptransformers.py | 62 +++++++++++++-------------- tests/test_fptransformersgenerator.py | 6 ++- 3 files changed, 39 insertions(+), 34 deletions(-) diff --git a/tests/fixtures.py b/tests/fixtures.py index 2b5a2e6..c3f392c 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -197,3 +197,8 @@ def combined_transformer(featurizer): remainder="drop", ) return transformer + + +@pytest.fixture +def morgan_transformer(): + return MorganFingerprintTransformer() diff --git a/tests/test_fptransformers.py b/tests/test_fptransformers.py index 4ad1e9d..debf9ef 100644 --- a/tests/test_fptransformers.py +++ b/tests/test_fptransformers.py @@ -29,11 +29,6 @@ ) -@pytest.fixture -def morgan_transformer(): - return MorganFingerprintTransformer() - - @pytest.fixture def rdkit_transformer(): return RDKitFingerprintTransformer() @@ -69,25 +64,26 @@ def avalon_transformer(): return AvalonFingerprintTransformer() -def test_fpstransformer_fp2array(morgan_transformer, fingerprint): - fp = morgan_transformer._fp2array(fingerprint) - # See that fp is the correct type, shape and bit count - assert type(fp) == type(np.array([0])) - assert fp.shape == (1000,) - assert fp.sum() == 25 +# morgan is no longer a fptransformer but a generator transformer, but why was this the only one to be tested here? +# def test_fpstransformer_fp2array(morgan_transformer, fingerprint): +# fp = morgan_transformer._fp2array(fingerprint) +# # See that fp is the correct type, shape and bit count +# assert type(fp) == type(np.array([0])) +# assert fp.shape == (1000,) +# assert fp.sum() == 25 -def test_fpstransformer_transform_mol(morgan_transformer, mols_list): - fp = morgan_transformer._transform_mol(mols_list[0]) - # See that fp is the correct type, shape and bit count - assert type(fp) == type(np.array([0])) - assert fp.shape == (2048,) - assert fp.sum() == 14 +# def test_fpstransformer_transform_mol(morgan_transformer, mols_list): +# fp = morgan_transformer._transform_mol(mols_list[0]) +# # See that fp is the correct type, shape and bit count +# assert type(fp) == type(np.array([0])) +# assert fp.shape == (2048,) +# assert fp.sum() == 14 def test_clonability( maccs_transformer, - morgan_transformer, + # morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, @@ -97,7 +93,7 @@ def test_clonability( ): for t in [ maccs_transformer, - morgan_transformer, + # morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, @@ -115,7 +111,7 @@ def test_clonability( def test_set_params( - morgan_transformer, + # morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, @@ -124,7 +120,7 @@ def test_set_params( avalon_transformer, ): for t in [ - morgan_transformer, + # morgan_transformer, atompair_transformer, topologicaltorsion_transformer, avalon_transformer, @@ -148,7 +144,7 @@ def test_set_params( def test_transform( mols_container, - morgan_transformer, + # morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, @@ -159,7 +155,7 @@ def test_transform( ): # Test the different transformers for t in [ - morgan_transformer, + # morgan_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, @@ -182,7 +178,7 @@ def test_transform( def test_transform_parallel( mols_container, - morgan_transformer, + # morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, @@ -193,7 +189,7 @@ def test_transform_parallel( ): # Test the different transformers for t in [ - morgan_transformer, + # morgan_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, @@ -215,7 +211,7 @@ def test_transform_parallel( def test_picklable( - morgan_transformer, + # morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, @@ -225,7 +221,7 @@ def test_picklable( ): # Test the different transformers for t in [ - morgan_transformer, + # morgan_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, @@ -386,7 +382,7 @@ def test_AvalonFingerprintTransformer(chiral_mols_list): def test_transform_with_safe_inference_mode( mols_with_invalid_container, - morgan_transformer, + # morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, @@ -395,7 +391,7 @@ def test_transform_with_safe_inference_mode( avalon_transformer, ): for t in [ - morgan_transformer, + # morgan_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, @@ -418,7 +414,7 @@ def test_transform_with_safe_inference_mode( def test_transform_without_safe_inference_mode( mols_with_invalid_container, - morgan_transformer, + # morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, @@ -428,7 +424,7 @@ def test_transform_without_safe_inference_mode( # MHFP seem to accept invalid mols and return 0,0,0,0's ): for t in [ - morgan_transformer, + # morgan_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, @@ -447,7 +443,7 @@ def test_transform_without_safe_inference_mode( # Add this test to check parallel processing with error handling def test_transform_parallel_with_safe_inference_mode( mols_with_invalid_container, - morgan_transformer, + # morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, @@ -456,7 +452,7 @@ def test_transform_parallel_with_safe_inference_mode( avalon_transformer, ): for t in [ - morgan_transformer, + # morgan_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, diff --git a/tests/test_fptransformersgenerator.py b/tests/test_fptransformersgenerator.py index c61f6e9..f35771b 100644 --- a/tests/test_fptransformersgenerator.py +++ b/tests/test_fptransformersgenerator.py @@ -15,12 +15,14 @@ from scikit_mol.fingerprints import ( MorganFPGeneratorTransformer, + MorganFingerprintTransformer, RDKitFPGeneratorTransformer, AtomPairFPGeneratorTransformer, TopologicalTorsionFPGeneatorTransformer, ) test_transformers = [ + MorganFingerprintTransformer, MorganFPGeneratorTransformer, RDKitFPGeneratorTransformer, AtomPairFPGeneratorTransformer, @@ -53,8 +55,10 @@ def test_fpstransformer_transform_mol(transformer_class, mols_list): assert fp.sum() == 12 elif isinstance(transformer, MorganFPGeneratorTransformer): assert fp.sum() == 14 + elif isinstance(transformer, MorganFingerprintTransformer): + assert fp.sum() == 14 else: - raise NotImplementedError("missing Assert") + raise NotImplementedError(f"missing Assert for {transformer_class}") @pytest.mark.parametrize("transformer_class", test_transformers) From d9b8eb711267c92e789efd119c543a911fc64533 Mon Sep 17 00:00:00 2001 From: Esben Jannik Bjerrum Date: Sun, 24 Nov 2024 09:11:48 +0100 Subject: [PATCH 19/24] Minor changes to tests --- scikit_mol/fingerprints/baseclasses.py | 1 + tests/test_parameter_types.py | 68 ++++++++++++++++---------- 2 files changed, 42 insertions(+), 27 deletions(-) diff --git a/scikit_mol/fingerprints/baseclasses.py b/scikit_mol/fingerprints/baseclasses.py index 8664630..30bfc5c 100644 --- a/scikit_mol/fingerprints/baseclasses.py +++ b/scikit_mol/fingerprints/baseclasses.py @@ -139,6 +139,7 @@ def _transform(self, X): self, "dtype" ): # TODO, it seems a bit of a code smell that we have to preemptively test a property from the baseclass? # Use the original, faster method if we're not in safe inference mode + # This also triggers a deprecation warning! arr = np.zeros((len(X), self.fpSize), dtype=self.dtype) for i, mol in enumerate(X): arr[i, :] = self._transform_mol(mol) diff --git a/tests/test_parameter_types.py b/tests/test_parameter_types.py index f175c87..ceea00c 100644 --- a/tests/test_parameter_types.py +++ b/tests/test_parameter_types.py @@ -1,46 +1,60 @@ import pytest import numpy as np from rdkit import Chem -from fixtures import mols_list, smiles_list -from test_fptransformers import morgan_transformer, atompair_transformer, topologicaltorsion_transformer, rdkit_transformer, avalon_transformer - - -def test_Transformer_exotic_types(mols_list, morgan_transformer,atompair_transformer, topologicaltorsion_transformer, avalon_transformer): - for transformer in [morgan_transformer, atompair_transformer, topologicaltorsion_transformer, avalon_transformer]: +from fixtures import mols_list, smiles_list, morgan_transformer +from test_fptransformers import ( + atompair_transformer, + topologicaltorsion_transformer, + rdkit_transformer, + avalon_transformer, +) + + +def test_Transformer_exotic_types( + mols_list, + morgan_transformer, + atompair_transformer, + topologicaltorsion_transformer, + avalon_transformer, +): + for transformer in [ + morgan_transformer, + atompair_transformer, + topologicaltorsion_transformer, + avalon_transformer, + ]: params = transformer.get_params() for useCounts in [np.bool_(True), np.bool_(False)]: - for key, value in params.items(): if isinstance(value, int): exotic_type_value = np.int64(value) elif isinstance(value, bool): exotic_type_value = np.bool_(value) else: - print(f'{key}:{value}:{type(value)}') + print(f"{key}:{value}:{type(value)}") exotic_type_value = value - exotic_params = {key:exotic_type_value, 'useCounts':useCounts} - print(exotic_params) + exotic_params = {key: exotic_type_value, "useCounts": useCounts} + print(exotic_params) transformer.set_params(**exotic_params) transformer.transform(mols_list) def test_RDKFp_exotic_types(mols_list, rdkit_transformer): - transformer = rdkit_transformer - params = transformer.get_params() - - for key, value in params.items(): - if isinstance(value, int): - exotic_type_value = np.int64(value) - elif isinstance(value, bool): - exotic_type_value = np.bool_(value) - else: - print(f'{key}:{value}:{type(value)}') - exotic_type_value = value - - exotic_params = {key:exotic_type_value} - print(exotic_params) - transformer.set_params(**exotic_params) - transformer.transform(mols_list) - + transformer = rdkit_transformer + params = transformer.get_params() + + for key, value in params.items(): + if isinstance(value, int): + exotic_type_value = np.int64(value) + elif isinstance(value, bool): + exotic_type_value = np.bool_(value) + else: + print(f"{key}:{value}:{type(value)}") + exotic_type_value = value + + exotic_params = {key: exotic_type_value} + print(exotic_params) + transformer.set_params(**exotic_params) + transformer.transform(mols_list) From 646808fd1abef6615b8da04e64b761c53ffb179e Mon Sep 17 00:00:00 2001 From: Esben Jannik Bjerrum Date: Sun, 24 Nov 2024 09:40:09 +0100 Subject: [PATCH 20/24] Updated test to reflect change to generator subclasses --- scikit_mol/fingerprints/atompair.py | 68 ++++++- scikit_mol/fingerprints/rdkitfp.py | 90 ++++++++- scikit_mol/fingerprints/topologicaltorsion.py | 57 +++++- tests/fixtures.py | 15 ++ tests/test_fptransformers.py | 189 +++++------------- tests/test_fptransformersgenerator.py | 40 ++-- tests/test_parameter_types.py | 8 +- 7 files changed, 300 insertions(+), 167 deletions(-) diff --git a/scikit_mol/fingerprints/atompair.py b/scikit_mol/fingerprints/atompair.py index 2198afd..f468575 100644 --- a/scikit_mol/fingerprints/atompair.py +++ b/scikit_mol/fingerprints/atompair.py @@ -10,7 +10,7 @@ from rdkit.Chem import rdMolDescriptors -class AtomPairFingerprintTransformer(FpsTransformer): +class AtomPairFingerprintTransformerClassic(FpsTransformer): def __init__( self, minLength: int = 1, @@ -78,6 +78,72 @@ def _mol2fp(self, mol): ) +class AtomPairFingerprintTransformer(FpsGeneratorTransformer): + _regenerate_on_properties = ( + "fpSize", + "includeChirality", + "use2D", + "minLength", + "maxLength", + ) + + def __init__( + self, + minLength: int = 1, + maxLength: int = 30, + fromAtoms=None, + ignoreAtoms=None, + atomInvariants=None, + includeChirality: bool = False, + use2D: bool = True, + confId: int = -1, + fpSize: int = 2048, + useCounts: bool = False, + parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, + ): + self._initializing = True + super().__init__(parallel=parallel, safe_inference_mode=safe_inference_mode) + self.fpSize = fpSize + self.use2D = use2D + self.includeChirality = includeChirality + self.minLength = minLength + self.maxLength = maxLength + + self.useCounts = useCounts + self.confId = confId + self.fromAtoms = fromAtoms + self.ignoreAtoms = ignoreAtoms + self.atomInvariants = atomInvariants + self._generate_fp_generator() + delattr(self, "_initializing") + + def _generate_fp_generator(self): + self._fpgen = GetAtomPairGenerator( + minDistance=int(self.minLength), + maxDistance=int(self.maxLength), + includeChirality=bool(self.includeChirality), + use2D=bool(self.use2D), + fpSize=int(self.fpSize), + ) + + def _transform_mol(self, mol) -> np.array: + if self.useCounts: + return self._fpgen.GetCountFingerprintAsNumPy( + mol, + fromAtoms=self.fromAtoms, + ignoreAtoms=self.ignoreAtoms, + customAtomInvariants=self.atomInvariants, + ) + else: + return self._fpgen.GetFingerprintAsNumPy( + mol, + fromAtoms=self.fromAtoms, + ignoreAtoms=self.ignoreAtoms, + customAtomInvariants=self.atomInvariants, + ) + + class AtomPairFPGeneratorTransformer(FpsGeneratorTransformer): _regenerate_on_properties = ( "fpSize", diff --git a/scikit_mol/fingerprints/rdkitfp.py b/scikit_mol/fingerprints/rdkitfp.py index ad87a26..13d9a27 100644 --- a/scikit_mol/fingerprints/rdkitfp.py +++ b/scikit_mol/fingerprints/rdkitfp.py @@ -11,7 +11,7 @@ from rdkit.Chem import rdFingerprintGenerator -class RDKitFingerprintTransformer(FpsTransformer): +class RDKitFingerprintTransformerClassic(FpsTransformer): def __init__( self, minPath: int = 1, @@ -88,6 +88,94 @@ def _mol2fp(self, mol): return generator.GetFingerprint(mol) +class RDKitFingerprintTransformer(FpsGeneratorTransformer): + _regenerate_on_properties = ( + "minPath", + "maxPath", + "useHs", + "branchedPaths", + "useBondOrder", + "countSimulation", + "fpSize", + "countBounds", + "numBitsPerFeature", + ) + + def __init__( + self, + minPath: int = 1, + maxPath: int = 7, + useHs: bool = True, + branchedPaths: bool = True, + useBondOrder: bool = True, + countSimulation: bool = False, + countBounds=None, + fpSize: int = 2048, + numBitsPerFeature: int = 2, + useCounts: bool = False, + parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, + ): + """Calculates the RDKit fingerprints + + Parameters + ---------- + minPath : int, optional + the minimum path length (in bonds) to be included, by default 1 + maxPath : int, optional + the maximum path length (in bonds) to be included, by default 7 + useHs : bool, optional + toggles inclusion of Hs in paths (if the molecule has explicit Hs), by default True + branchedPaths : bool, optional + toggles generation of branched subgraphs, not just linear paths, by default True + useBondOrder : bool, optional + toggles inclusion of bond orders in the path hashes, by default True + countSimulation : bool, optional + if set, use count simulation while generating the fingerprint, by default False + countBounds : _type_, optional + boundaries for count simulation, corresponding bit will be set if the count is higher than the number provided for that spot, by default None + fpSize : int, optional + size of the generated fingerprint, does not affect the sparse versions, by default 2048 + numBitsPerFeature : int, optional + the number of bits set per path/subgraph found, by default 2 + """ + self._initializing = True + super().__init__(parallel=parallel, safe_inference_mode=safe_inference_mode) + self.minPath = minPath + self.maxPath = maxPath + self.useHs = useHs + self.branchedPaths = branchedPaths + self.useBondOrder = useBondOrder + self.countSimulation = countSimulation + self.fpSize = fpSize + self.numBitsPerFeature = numBitsPerFeature + self.countBounds = countBounds + + self.useCounts = useCounts + + self._generate_fp_generator() + delattr(self, "_initializing") + + def _transform_mol(self, mol) -> np.array: + if self.useCounts: + return self._fpgen.GetCountFingerprintAsNumPy(mol) + else: + return self._fpgen.GetFingerprintAsNumPy(mol) + + def _generate_fp_generator(self): + self._fpgen = GetRDKitFPGenerator( + minPath=int(self.minPath), + maxPath=int(self.maxPath), + useHs=bool(self.useHs), + branchedPaths=bool(self.branchedPaths), + useBondOrder=bool(self.useBondOrder), + countSimulation=bool(self.countSimulation), + fpSize=int(self.fpSize), + countBounds=bool(self.countBounds), + numBitsPerFeature=int(self.numBitsPerFeature), + ) + + class RDKitFPGeneratorTransformer(FpsGeneratorTransformer): _regenerate_on_properties = ( "minPath", diff --git a/scikit_mol/fingerprints/topologicaltorsion.py b/scikit_mol/fingerprints/topologicaltorsion.py index 63b68bf..f983bcc 100644 --- a/scikit_mol/fingerprints/topologicaltorsion.py +++ b/scikit_mol/fingerprints/topologicaltorsion.py @@ -10,7 +10,7 @@ from rdkit.Chem.rdFingerprintGenerator import GetTopologicalTorsionGenerator -class TopologicalTorsionFingerprintTransformer(FpsTransformer): +class TopologicalTorsionFingerprintTransformerClassic(FpsTransformer): def __init__( self, targetSize: int = 4, @@ -66,6 +66,61 @@ def _mol2fp(self, mol): ) +class TopologicalTorsionFingerprintTransformer(FpsGeneratorTransformer): + _regenerate_on_properties = ("fpSize", "includeChirality", "targetSize") + + def __init__( + self, + targetSize: int = 4, + fromAtoms=None, + ignoreAtoms=None, + atomInvariants=None, + confId=-1, + includeChirality: bool = False, + fpSize: int = 2048, + useCounts: bool = False, + parallel: Union[bool, int] = False, + safe_inference_mode: bool = False, + ): + self._initializing = True + super().__init__(parallel=parallel, safe_inference_mode=safe_inference_mode) + self.fpSize = fpSize + self.includeChirality = includeChirality + self.targetSize = targetSize + + self.fromAtoms = fromAtoms + self.ignoreAtoms = ignoreAtoms + self.atomInvariants = atomInvariants + self.confId = confId + self.useCounts = useCounts + + self._generate_fp_generator() + delattr(self, "_initializing") + + def _generate_fp_generator(self): + self._fpgen = GetTopologicalTorsionGenerator( + torsionAtomCount=int(self.targetSize), + includeChirality=bool(self.includeChirality), + fpSize=int(self.fpSize), + ) + + def _transform_mol(self, mol) -> np.array: + if self.useCounts: + return self._fpgen.GetCountFingerprintAsNumPy( + mol, + fromAtoms=self.fromAtoms, + ignoreAtoms=self.ignoreAtoms, + customAtomInvariants=self.atomInvariants, + ) + else: + return self._fpgen.GetFingerprintAsNumPy( + mol, + fromAtoms=self.fromAtoms, + ignoreAtoms=self.ignoreAtoms, + customAtomInvariants=self.atomInvariants, + ) + + class TopologicalTorsionFPGeneatorTransformer(FpsGeneratorTransformer): _regenerate_on_properties = ("fpSize", "includeChirality", "targetSize") diff --git a/tests/fixtures.py b/tests/fixtures.py index c3f392c..1434307 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -202,3 +202,18 @@ def combined_transformer(featurizer): @pytest.fixture def morgan_transformer(): return MorganFingerprintTransformer() + + +@pytest.fixture +def rdkit_transformer(): + return RDKitFingerprintTransformer() + + +@pytest.fixture +def atompair_transformer(): + return AtomPairFingerprintTransformer() + + +@pytest.fixture +def topologicaltorsion_transformer(): + return TopologicalTorsionFingerprintTransformer() diff --git a/tests/test_fptransformers.py b/tests/test_fptransformers.py index debf9ef..c977f7c 100644 --- a/tests/test_fptransformers.py +++ b/tests/test_fptransformers.py @@ -18,32 +18,17 @@ from sklearn import clone from scikit_mol.fingerprints import ( - MorganFingerprintTransformer, + # MorganFingerprintTransformer, MACCSKeysFingerprintTransformer, - RDKitFingerprintTransformer, - AtomPairFingerprintTransformer, - TopologicalTorsionFingerprintTransformer, + # RDKitFingerprintTransformer, + # AtomPairFingerprintTransformer, + # TopologicalTorsionFingerprintTransformer, SECFingerprintTransformer, MHFingerprintTransformer, AvalonFingerprintTransformer, ) -@pytest.fixture -def rdkit_transformer(): - return RDKitFingerprintTransformer() - - -@pytest.fixture -def atompair_transformer(): - return AtomPairFingerprintTransformer() - - -@pytest.fixture -def topologicaltorsion_transformer(): - return TopologicalTorsionFingerprintTransformer() - - @pytest.fixture def maccs_transformer(): return MACCSKeysFingerprintTransformer() @@ -84,9 +69,9 @@ def avalon_transformer(): def test_clonability( maccs_transformer, # morgan_transformer, - rdkit_transformer, - atompair_transformer, - topologicaltorsion_transformer, + # rdkit_transformer, + # atompair_transformer, + # topologicaltorsion_transformer, secfp_transformer, mhfp_transformer, avalon_transformer, @@ -94,9 +79,9 @@ def test_clonability( for t in [ maccs_transformer, # morgan_transformer, - rdkit_transformer, - atompair_transformer, - topologicaltorsion_transformer, + # rdkit_transformer, + # atompair_transformer, + # topologicaltorsion_transformer, secfp_transformer, mhfp_transformer, avalon_transformer, @@ -112,17 +97,17 @@ def test_clonability( def test_set_params( # morgan_transformer, - rdkit_transformer, - atompair_transformer, - topologicaltorsion_transformer, + # rdkit_transformer, + # atompair_transformer, + # topologicaltorsion_transformer, secfp_transformer, mhfp_transformer, avalon_transformer, ): for t in [ # morgan_transformer, - atompair_transformer, - topologicaltorsion_transformer, + # atompair_transformer, + # topologicaltorsion_transformer, avalon_transformer, ]: params = t.get_params() @@ -134,7 +119,7 @@ def test_set_params( params_2 = t.get_params() assert all([params[key] == params_2[key] for key in params.keys()]) - for t in [rdkit_transformer, secfp_transformer, mhfp_transformer]: + for t in [secfp_transformer, mhfp_transformer]: params = t.get_params() params["fpSize"] = 4242 t.set_params(fpSize=4242) @@ -145,9 +130,9 @@ def test_set_params( def test_transform( mols_container, # morgan_transformer, - rdkit_transformer, - atompair_transformer, - topologicaltorsion_transformer, + # rdkit_transformer, + # atompair_transformer, + # topologicaltorsion_transformer, maccs_transformer, secfp_transformer, mhfp_transformer, @@ -156,10 +141,10 @@ def test_transform( # Test the different transformers for t in [ # morgan_transformer, - atompair_transformer, - topologicaltorsion_transformer, + # atompair_transformer, + # topologicaltorsion_transformer, maccs_transformer, - rdkit_transformer, + # rdkit_transformer, secfp_transformer, mhfp_transformer, avalon_transformer, @@ -179,9 +164,9 @@ def test_transform( def test_transform_parallel( mols_container, # morgan_transformer, - rdkit_transformer, - atompair_transformer, - topologicaltorsion_transformer, + # rdkit_transformer, + # atompair_transformer, + # topologicaltorsion_transformer, maccs_transformer, secfp_transformer, mhfp_transformer, @@ -190,10 +175,10 @@ def test_transform_parallel( # Test the different transformers for t in [ # morgan_transformer, - atompair_transformer, - topologicaltorsion_transformer, + # atompair_transformer, + # topologicaltorsion_transformer, maccs_transformer, - rdkit_transformer, + # rdkit_transformer, secfp_transformer, mhfp_transformer, avalon_transformer, @@ -212,9 +197,9 @@ def test_transform_parallel( def test_picklable( # morgan_transformer, - rdkit_transformer, - atompair_transformer, - topologicaltorsion_transformer, + # rdkit_transformer, + # atompair_transformer, + # topologicaltorsion_transformer, maccs_transformer, secfp_transformer, avalon_transformer, @@ -222,10 +207,10 @@ def test_picklable( # Test the different transformers for t in [ # morgan_transformer, - atompair_transformer, - topologicaltorsion_transformer, + # atompair_transformer, + # topologicaltorsion_transformer, maccs_transformer, - rdkit_transformer, + # rdkit_transformer, secfp_transformer, avalon_transformer, ]: @@ -269,74 +254,6 @@ def assert_transformer_set_params(tr_class, new_params, mols_list): ), f"Assertation error, FP appears to be different, although the {key} should be changed back as well as initialized to {params[key]}" -def test_morgan_set_params(chiral_mols_list): - new_params = { - "fpSize": 1024, - "radius": 1, - "useBondTypes": False, # TODO, why doesn't this change the FP? - "useChirality": True, - "useCounts": True, - "useFeatures": True, - } - - assert_transformer_set_params( - MorganFingerprintTransformer, new_params, chiral_mols_list - ) - - -def test_atompairs_set_params(chiral_mols_list): - new_params = { - #'atomInvariants': 1, - #'confId': -1, - #'fromAtoms': 1, - #'ignoreAtoms': 0, - "includeChirality": True, - "maxLength": 3, - "minLength": 3, - "fpSize": 1024, - "nBitsPerEntry": 3, - #'use2D': True, #TODO, understand why this can't be set different - "useCounts": True, - } - - assert_transformer_set_params( - AtomPairFingerprintTransformer, new_params, chiral_mols_list - ) - - -def test_topologicaltorsion_set_params(chiral_mols_list): - new_params = { #'atomInvariants': 0, - #'fromAtoms': 0, - #'ignoreAtoms': 0, - #'includeChirality': True, #TODO, figure out why this setting seems to give same FP wheter toggled or not - "fpSize": 1024, - "nBitsPerEntry": 3, - "targetSize": 5, - "useCounts": True, - } - - assert_transformer_set_params( - TopologicalTorsionFingerprintTransformer, new_params, chiral_mols_list - ) - - -def test_RDKitFPTransformer(chiral_mols_list): - new_params = { #'atomInvariantsGenerator': None, - #'branchedPaths': False, - #'countBounds': 0, #TODO: What does this do? - "countSimulation": True, - "fpSize": 1024, - "maxPath": 3, - "minPath": 2, - "numBitsPerFeature": 3, - "useBondOrder": False, # TODO, why doesn't this change the FP? - #'useHs': False, #TODO, why doesn't this change the FP? - } - assert_transformer_set_params( - RDKitFingerprintTransformer, new_params, chiral_mols_list - ) - - def test_SECFingerprintTransformer(chiral_mols_list): new_params = { "isomeric": True, @@ -383,19 +300,19 @@ def test_AvalonFingerprintTransformer(chiral_mols_list): def test_transform_with_safe_inference_mode( mols_with_invalid_container, # morgan_transformer, - rdkit_transformer, - atompair_transformer, - topologicaltorsion_transformer, + # rdkit_transformer, + # atompair_transformer, + # topologicaltorsion_transformer, maccs_transformer, secfp_transformer, avalon_transformer, ): for t in [ # morgan_transformer, - atompair_transformer, - topologicaltorsion_transformer, + # atompair_transformer, + # topologicaltorsion_transformer, maccs_transformer, - rdkit_transformer, + # rdkit_transformer, secfp_transformer, avalon_transformer, ]: @@ -415,9 +332,9 @@ def test_transform_with_safe_inference_mode( def test_transform_without_safe_inference_mode( mols_with_invalid_container, # morgan_transformer, - rdkit_transformer, - atompair_transformer, - topologicaltorsion_transformer, + # rdkit_transformer, + # atompair_transformer, + # topologicaltorsion_transformer, maccs_transformer, secfp_transformer, avalon_transformer, @@ -425,10 +342,10 @@ def test_transform_without_safe_inference_mode( ): for t in [ # morgan_transformer, - atompair_transformer, - topologicaltorsion_transformer, + # atompair_transformer, + # topologicaltorsion_transformer, maccs_transformer, - rdkit_transformer, + # rdkit_transformer, secfp_transformer, avalon_transformer, ]: @@ -444,19 +361,19 @@ def test_transform_without_safe_inference_mode( def test_transform_parallel_with_safe_inference_mode( mols_with_invalid_container, # morgan_transformer, - rdkit_transformer, - atompair_transformer, - topologicaltorsion_transformer, + # rdkit_transformer, + # atompair_transformer, + # topologicaltorsion_transformer, maccs_transformer, secfp_transformer, avalon_transformer, ): for t in [ - # morgan_transformer, - atompair_transformer, - topologicaltorsion_transformer, + # # morgan_transformer, + # atompair_transformer, + # topologicaltorsion_transformer, maccs_transformer, - rdkit_transformer, + # rdkit_transformer, secfp_transformer, avalon_transformer, ]: diff --git a/tests/test_fptransformersgenerator.py b/tests/test_fptransformersgenerator.py index f35771b..aa7a426 100644 --- a/tests/test_fptransformersgenerator.py +++ b/tests/test_fptransformersgenerator.py @@ -14,30 +14,20 @@ from sklearn import clone from scikit_mol.fingerprints import ( - MorganFPGeneratorTransformer, + AtomPairFingerprintTransformer, MorganFingerprintTransformer, - RDKitFPGeneratorTransformer, - AtomPairFPGeneratorTransformer, - TopologicalTorsionFPGeneatorTransformer, + RDKitFingerprintTransformer, + TopologicalTorsionFingerprintTransformer, ) test_transformers = [ + AtomPairFingerprintTransformer, MorganFingerprintTransformer, - MorganFPGeneratorTransformer, - RDKitFPGeneratorTransformer, - AtomPairFPGeneratorTransformer, - TopologicalTorsionFPGeneatorTransformer, + RDKitFingerprintTransformer, + TopologicalTorsionFingerprintTransformer, ] -# @pytest.mark.parametrize("transformer_class", test_transformers) -# def test_fpstransformer_fp2array(transformer_class, fingerprint): -# transformer = transformer_class() - -# with pytest.raises(DeprecationWarning, match='Generators can directly return fingerprints'): -# fp = transformer._fp2array(fingerprint) - - @pytest.mark.parametrize("transformer_class", test_transformers) def test_fpstransformer_transform_mol(transformer_class, mols_list): transformer = transformer_class() @@ -47,14 +37,12 @@ def test_fpstransformer_transform_mol(transformer_class, mols_list): assert type(fp) == type(np.array([0])) assert fp.shape == (2048,) - if isinstance(transformer, RDKitFPGeneratorTransformer): + if isinstance(transformer, RDKitFingerprintTransformer): assert fp.sum() == 104 - elif isinstance(transformer, AtomPairFPGeneratorTransformer): + elif isinstance(transformer, AtomPairFingerprintTransformer): assert fp.sum() == 32 - elif isinstance(transformer, TopologicalTorsionFPGeneatorTransformer): + elif isinstance(transformer, TopologicalTorsionFingerprintTransformer): assert fp.sum() == 12 - elif isinstance(transformer, MorganFPGeneratorTransformer): - assert fp.sum() == 14 elif isinstance(transformer, MorganFingerprintTransformer): assert fp.sum() == 14 else: @@ -177,7 +165,7 @@ def test_morgan_set_params(chiral_mols_list): } assert_transformer_set_params( - MorganFPGeneratorTransformer, new_params, chiral_mols_list + MorganFingerprintTransformer, new_params, chiral_mols_list ) @@ -191,13 +179,13 @@ def test_atompairs_set_params(chiral_mols_list): "maxLength": 3, "minLength": 3, "fpSize": 1024, - #'nBitsPerEntry': 3, #Todo: not setable with the generators? + #'nBitsPerEntry': 3, #TODO: seem deprecated with the generators? #'use2D': True, #TODO, understand why this can't be set different "useCounts": True, } assert_transformer_set_params( - AtomPairFPGeneratorTransformer, new_params, chiral_mols_list + AtomPairFingerprintTransformer, new_params, chiral_mols_list ) @@ -213,7 +201,7 @@ def test_topologicaltorsion_set_params(chiral_mols_list): } assert_transformer_set_params( - TopologicalTorsionFPGeneatorTransformer, new_params, chiral_mols_list + TopologicalTorsionFingerprintTransformer, new_params, chiral_mols_list ) @@ -230,5 +218,5 @@ def test_RDKitFPTransformer(chiral_mols_list): #'useHs': False, #TODO, why doesn't this change the FP? } assert_transformer_set_params( - RDKitFPGeneratorTransformer, new_params, chiral_mols_list + RDKitFingerprintTransformer, new_params, chiral_mols_list ) diff --git a/tests/test_parameter_types.py b/tests/test_parameter_types.py index ceea00c..4b73959 100644 --- a/tests/test_parameter_types.py +++ b/tests/test_parameter_types.py @@ -1,11 +1,15 @@ import pytest import numpy as np from rdkit import Chem -from fixtures import mols_list, smiles_list, morgan_transformer -from test_fptransformers import ( +from fixtures import ( + mols_list, + smiles_list, + morgan_transformer, atompair_transformer, topologicaltorsion_transformer, rdkit_transformer, +) +from test_fptransformers import ( avalon_transformer, ) From 0b0f0fc0422c93e7a6c5ad572cf423d9aa783a36 Mon Sep 17 00:00:00 2001 From: Esben Jannik Bjerrum Date: Sun, 24 Nov 2024 09:41:34 +0100 Subject: [PATCH 21/24] updated gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 6996e69..5d2fa4a 100644 --- a/.gitignore +++ b/.gitignore @@ -138,6 +138,6 @@ tests/data/ # setuptools_scm version scikit_mol/_version.py -notebooks/sandbox.py .vscode notebooks/SLC6A4_active_excape_export.csv +sandbox/ From 570119271ae04d2eec1a987b9262c57a974ef726 Mon Sep 17 00:00:00 2001 From: Esben Jannik Bjerrum Date: Sun, 24 Nov 2024 10:02:02 +0100 Subject: [PATCH 22/24] Fixed some issues that created deprecation warnings. All tests passes, and only warnings from dependencies --- scikit_mol/fingerprints/baseclasses.py | 23 +++---- scikit_mol/fingerprints/minhash.py | 16 +++-- tests/test_fptransformers.py | 88 +------------------------- 3 files changed, 22 insertions(+), 105 deletions(-) diff --git a/scikit_mol/fingerprints/baseclasses.py b/scikit_mol/fingerprints/baseclasses.py index 30bfc5c..e28fa07 100644 --- a/scikit_mol/fingerprints/baseclasses.py +++ b/scikit_mol/fingerprints/baseclasses.py @@ -132,19 +132,9 @@ def fit(self, X, y=None): @check_transform_input def _transform(self, X): if self.safe_inference_mode: - # Use the new method with masked arrays if we're in safe inference mode arrays = [self._safe_transform_mol(mol) for mol in X] return np.ma.stack(arrays) - elif hasattr( - self, "dtype" - ): # TODO, it seems a bit of a code smell that we have to preemptively test a property from the baseclass? - # Use the original, faster method if we're not in safe inference mode - # This also triggers a deprecation warning! - arr = np.zeros((len(X), self.fpSize), dtype=self.dtype) - for i, mol in enumerate(X): - arr[i, :] = self._transform_mol(mol) - return arr - else: # We are unsure on the dtype, so we don't use a preassigned array #TODO test time differnece to previous + else: arrays = [self._transform_mol(mol) for mol in X] return np.stack(arrays) @@ -237,6 +227,17 @@ def _fp2array(self, fp): else: return np.ma.masked_all((self.fpSize,), dtype=self.dtype) + @check_transform_input + def _transform(self, X): + if self.safe_inference_mode: + arrays = [self._safe_transform_mol(mol) for mol in X] + return np.ma.stack(arrays) + else: + arr = np.zeros((len(X), self.fpSize), dtype=self.dtype) + for i, mol in enumerate(X): + arr[i, :] = self._transform_mol(mol) + return arr + # TODO, remove when finally deprecating nBits def _get_param_names(self): """Get parameter names excluding deprecated parameters""" diff --git a/scikit_mol/fingerprints/minhash.py b/scikit_mol/fingerprints/minhash.py index 9d0ec31..e487739 100644 --- a/scikit_mol/fingerprints/minhash.py +++ b/scikit_mol/fingerprints/minhash.py @@ -75,7 +75,9 @@ def _fp2array(self, fp): return np.array(fp) def _recreate_encoder(self): - self.mhfp_encoder = rdMHFPFingerprint.MHFPEncoder(self.fpSize, self._seed) + self.mhfp_encoder = rdMHFPFingerprint.MHFPEncoder( + int(self.fpSize), int(self._seed) + ) @property def seed(self): @@ -166,12 +168,12 @@ def __setstate__(self, state): def _mol2fp(self, mol): return self.mhfp_encoder.EncodeSECFPMol( mol, - self.radius, - self.rings, - self.isomeric, - self.kekulize, - self.min_radius, - self.length, + int(self.radius), + bool(self.rings), + bool(self.isomeric), + bool(self.kekulize), + int(self.min_radius), + int(self.fpSize), ) def _recreate_encoder(self): diff --git a/tests/test_fptransformers.py b/tests/test_fptransformers.py index c977f7c..aa3ae3d 100644 --- a/tests/test_fptransformers.py +++ b/tests/test_fptransformers.py @@ -18,11 +18,7 @@ from sklearn import clone from scikit_mol.fingerprints import ( - # MorganFingerprintTransformer, MACCSKeysFingerprintTransformer, - # RDKitFingerprintTransformer, - # AtomPairFingerprintTransformer, - # TopologicalTorsionFingerprintTransformer, SECFingerprintTransformer, MHFingerprintTransformer, AvalonFingerprintTransformer, @@ -49,39 +45,14 @@ def avalon_transformer(): return AvalonFingerprintTransformer() -# morgan is no longer a fptransformer but a generator transformer, but why was this the only one to be tested here? -# def test_fpstransformer_fp2array(morgan_transformer, fingerprint): -# fp = morgan_transformer._fp2array(fingerprint) -# # See that fp is the correct type, shape and bit count -# assert type(fp) == type(np.array([0])) -# assert fp.shape == (1000,) -# assert fp.sum() == 25 - - -# def test_fpstransformer_transform_mol(morgan_transformer, mols_list): -# fp = morgan_transformer._transform_mol(mols_list[0]) -# # See that fp is the correct type, shape and bit count -# assert type(fp) == type(np.array([0])) -# assert fp.shape == (2048,) -# assert fp.sum() == 14 - - def test_clonability( maccs_transformer, - # morgan_transformer, - # rdkit_transformer, - # atompair_transformer, - # topologicaltorsion_transformer, secfp_transformer, mhfp_transformer, avalon_transformer, ): for t in [ maccs_transformer, - # morgan_transformer, - # rdkit_transformer, - # atompair_transformer, - # topologicaltorsion_transformer, secfp_transformer, mhfp_transformer, avalon_transformer, @@ -96,20 +67,11 @@ def test_clonability( def test_set_params( - # morgan_transformer, - # rdkit_transformer, - # atompair_transformer, - # topologicaltorsion_transformer, secfp_transformer, mhfp_transformer, avalon_transformer, ): - for t in [ - # morgan_transformer, - # atompair_transformer, - # topologicaltorsion_transformer, - avalon_transformer, - ]: + for t in [avalon_transformer]: params = t.get_params() # change extracted dictionary params["fpSize"] = 4242 @@ -129,10 +91,6 @@ def test_set_params( def test_transform( mols_container, - # morgan_transformer, - # rdkit_transformer, - # atompair_transformer, - # topologicaltorsion_transformer, maccs_transformer, secfp_transformer, mhfp_transformer, @@ -140,11 +98,7 @@ def test_transform( ): # Test the different transformers for t in [ - # morgan_transformer, - # atompair_transformer, - # topologicaltorsion_transformer, maccs_transformer, - # rdkit_transformer, secfp_transformer, mhfp_transformer, avalon_transformer, @@ -163,10 +117,6 @@ def test_transform( def test_transform_parallel( mols_container, - # morgan_transformer, - # rdkit_transformer, - # atompair_transformer, - # topologicaltorsion_transformer, maccs_transformer, secfp_transformer, mhfp_transformer, @@ -174,11 +124,7 @@ def test_transform_parallel( ): # Test the different transformers for t in [ - # morgan_transformer, - # atompair_transformer, - # topologicaltorsion_transformer, maccs_transformer, - # rdkit_transformer, secfp_transformer, mhfp_transformer, avalon_transformer, @@ -196,21 +142,13 @@ def test_transform_parallel( def test_picklable( - # morgan_transformer, - # rdkit_transformer, - # atompair_transformer, - # topologicaltorsion_transformer, maccs_transformer, secfp_transformer, avalon_transformer, ): # Test the different transformers for t in [ - # morgan_transformer, - # atompair_transformer, - # topologicaltorsion_transformer, maccs_transformer, - # rdkit_transformer, secfp_transformer, avalon_transformer, ]: @@ -299,20 +237,12 @@ def test_AvalonFingerprintTransformer(chiral_mols_list): def test_transform_with_safe_inference_mode( mols_with_invalid_container, - # morgan_transformer, - # rdkit_transformer, - # atompair_transformer, - # topologicaltorsion_transformer, maccs_transformer, secfp_transformer, avalon_transformer, ): for t in [ - # morgan_transformer, - # atompair_transformer, - # topologicaltorsion_transformer, maccs_transformer, - # rdkit_transformer, secfp_transformer, avalon_transformer, ]: @@ -331,21 +261,13 @@ def test_transform_with_safe_inference_mode( def test_transform_without_safe_inference_mode( mols_with_invalid_container, - # morgan_transformer, - # rdkit_transformer, - # atompair_transformer, - # topologicaltorsion_transformer, maccs_transformer, secfp_transformer, avalon_transformer, # MHFP seem to accept invalid mols and return 0,0,0,0's ): for t in [ - # morgan_transformer, - # atompair_transformer, - # topologicaltorsion_transformer, maccs_transformer, - # rdkit_transformer, secfp_transformer, avalon_transformer, ]: @@ -360,20 +282,12 @@ def test_transform_without_safe_inference_mode( # Add this test to check parallel processing with error handling def test_transform_parallel_with_safe_inference_mode( mols_with_invalid_container, - # morgan_transformer, - # rdkit_transformer, - # atompair_transformer, - # topologicaltorsion_transformer, maccs_transformer, secfp_transformer, avalon_transformer, ): for t in [ - # # morgan_transformer, - # atompair_transformer, - # topologicaltorsion_transformer, maccs_transformer, - # rdkit_transformer, secfp_transformer, avalon_transformer, ]: From 35cfdc14fe5efd85a7fbf94941884d04685b4b0b Mon Sep 17 00:00:00 2001 From: Esben Jannik Bjerrum Date: Sun, 24 Nov 2024 10:06:41 +0100 Subject: [PATCH 23/24] Deleted the generator and classic temporary classes and updated tests --- scikit_mol/fingerprints/__init__.py | 7 +- scikit_mol/fingerprints/atompair.py | 134 -------------- scikit_mol/fingerprints/morgan.py | 145 --------------- scikit_mol/fingerprints/rdkitfp.py | 165 ------------------ scikit_mol/fingerprints/topologicaltorsion.py | 111 ------------ tests/test_transformers.py | 37 ---- 6 files changed, 3 insertions(+), 596 deletions(-) diff --git a/scikit_mol/fingerprints/__init__.py b/scikit_mol/fingerprints/__init__.py index 5ed655d..c0b4cb7 100644 --- a/scikit_mol/fingerprints/__init__.py +++ b/scikit_mol/fingerprints/__init__.py @@ -3,13 +3,12 @@ FpsGeneratorTransformer, ) # TODO, for backwards compatibility with tests, needs to be removed -from .atompair import AtomPairFingerprintTransformer, AtomPairFPGeneratorTransformer +from .atompair import AtomPairFingerprintTransformer from .avalon import AvalonFingerprintTransformer from .maccs import MACCSKeysFingerprintTransformer from .minhash import MHFingerprintTransformer, SECFingerprintTransformer -from .morgan import MorganFingerprintTransformer, MorganFPGeneratorTransformer -from .rdkitfp import RDKitFingerprintTransformer, RDKitFPGeneratorTransformer +from .morgan import MorganFingerprintTransformer +from .rdkitfp import RDKitFingerprintTransformer from .topologicaltorsion import ( TopologicalTorsionFingerprintTransformer, - TopologicalTorsionFPGeneatorTransformer, ) diff --git a/scikit_mol/fingerprints/atompair.py b/scikit_mol/fingerprints/atompair.py index f468575..ded1f18 100644 --- a/scikit_mol/fingerprints/atompair.py +++ b/scikit_mol/fingerprints/atompair.py @@ -10,74 +10,6 @@ from rdkit.Chem import rdMolDescriptors -class AtomPairFingerprintTransformerClassic(FpsTransformer): - def __init__( - self, - minLength: int = 1, - maxLength: int = 30, - fromAtoms=0, - ignoreAtoms=0, - atomInvariants=0, - nBitsPerEntry: int = 4, - includeChirality: bool = False, - use2D: bool = True, - confId: int = -1, - fpSize=2048, - useCounts: bool = False, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - dtype: np.dtype = np.int8, - ): - super().__init__( - parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype - ) - self.minLength = minLength - self.maxLength = maxLength - self.fromAtoms = fromAtoms - self.ignoreAtoms = ignoreAtoms - self.atomInvariants = atomInvariants - self.includeChirality = includeChirality - self.use2D = use2D - self.confId = confId - self.fpSize = fpSize - self.nBitsPerEntry = nBitsPerEntry - self.useCounts = useCounts - - warn( - "AtomPairFingerprintTransformer will be replace by AtomPairFPGeneratorTransformer, due to changes in RDKit!", - DeprecationWarning, - ) - - def _mol2fp(self, mol): - if self.useCounts: - return rdMolDescriptors.GetHashedAtomPairFingerprint( - mol, - nBits=int(self.fpSize), - minLength=int(self.minLength), - maxLength=int(self.maxLength), - fromAtoms=self.fromAtoms, - ignoreAtoms=self.ignoreAtoms, - atomInvariants=self.atomInvariants, - includeChirality=bool(self.includeChirality), - use2D=bool(self.use2D), - confId=int(self.confId), - ) - else: - return rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect( - mol, - nBits=int(self.fpSize), - minLength=int(self.minLength), - maxLength=int(self.maxLength), - fromAtoms=self.fromAtoms, - ignoreAtoms=self.ignoreAtoms, - atomInvariants=self.atomInvariants, - nBitsPerEntry=int(self.nBitsPerEntry), - includeChirality=bool(self.includeChirality), - use2D=bool(self.use2D), - confId=int(self.confId), - ) - - class AtomPairFingerprintTransformer(FpsGeneratorTransformer): _regenerate_on_properties = ( "fpSize", @@ -142,69 +74,3 @@ def _transform_mol(self, mol) -> np.array: ignoreAtoms=self.ignoreAtoms, customAtomInvariants=self.atomInvariants, ) - - -class AtomPairFPGeneratorTransformer(FpsGeneratorTransformer): - _regenerate_on_properties = ( - "fpSize", - "includeChirality", - "use2D", - "minLength", - "maxLength", - ) - - def __init__( - self, - minLength: int = 1, - maxLength: int = 30, - fromAtoms=None, - ignoreAtoms=None, - atomInvariants=None, - includeChirality: bool = False, - use2D: bool = True, - confId: int = -1, - fpSize: int = 2048, - useCounts: bool = False, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - ): - self._initializing = True - super().__init__(parallel=parallel, safe_inference_mode=safe_inference_mode) - self.fpSize = fpSize - self.use2D = use2D - self.includeChirality = includeChirality - self.minLength = minLength - self.maxLength = maxLength - - self.useCounts = useCounts - self.confId = confId - self.fromAtoms = fromAtoms - self.ignoreAtoms = ignoreAtoms - self.atomInvariants = atomInvariants - self._generate_fp_generator() - delattr(self, "_initializing") - - def _generate_fp_generator(self): - self._fpgen = GetAtomPairGenerator( - minDistance=self.minLength, - maxDistance=self.maxLength, - includeChirality=self.includeChirality, - use2D=self.use2D, - fpSize=self.fpSize, - ) - - def _transform_mol(self, mol) -> np.array: - if self.useCounts: - return self._fpgen.GetCountFingerprintAsNumPy( - mol, - fromAtoms=self.fromAtoms, - ignoreAtoms=self.ignoreAtoms, - customAtomInvariants=self.atomInvariants, - ) - else: - return self._fpgen.GetFingerprintAsNumPy( - mol, - fromAtoms=self.fromAtoms, - ignoreAtoms=self.ignoreAtoms, - customAtomInvariants=self.atomInvariants, - ) diff --git a/scikit_mol/fingerprints/morgan.py b/scikit_mol/fingerprints/morgan.py index 6ddb6b9..f2b1edf 100644 --- a/scikit_mol/fingerprints/morgan.py +++ b/scikit_mol/fingerprints/morgan.py @@ -14,72 +14,6 @@ from .baseclasses import FpsTransformer, FpsGeneratorTransformer -class MorganFingerprintTransformerClassic(FpsTransformer): - def __init__( - self, - fpSize=2048, - radius=2, - useChirality=False, - useBondTypes=True, - useFeatures=False, - useCounts=False, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - dtype: np.dtype = np.int8, - ): - """Transform RDKit mols into Count or bit-based hashed MorganFingerprints - - Parameters - ---------- - fpSize : int, optional - Size of the hashed fingerprint, by default 2048 - radius : int, optional - Radius of the fingerprint, by default 2 - useChirality : bool, optional - Include chirality in calculation of the fingerprint keys, by default False - useBondTypes : bool, optional - Include bondtypes in calculation of the fingerprint keys, by default True - useFeatures : bool, optional - use chemical features, rather than atom-type in calculation of the fingerprint keys, by default False - useCounts : bool, optional - If toggled will create the count and not bit-based fingerprint, by default False - """ - super().__init__( - parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype - ) - self.fpSize = fpSize - self.radius = radius - self.useChirality = useChirality - self.useBondTypes = useBondTypes - self.useFeatures = useFeatures - self.useCounts = useCounts - - warn( - "MorganFingerprintTransformer will be replace by MorganGeneratorTransformer, due to changes in RDKit!", - DeprecationWarning, - ) - - def _mol2fp(self, mol): - if self.useCounts: - return rdMolDescriptors.GetHashedMorganFingerprint( - mol, - int(self.radius), - nBits=int(self.fpSize), - useFeatures=bool(self.useFeatures), - useChirality=bool(self.useChirality), - useBondTypes=bool(self.useBondTypes), - ) - else: - return rdMolDescriptors.GetMorganFingerprintAsBitVect( - mol, - int(self.radius), - nBits=int(self.fpSize), - useFeatures=bool(self.useFeatures), - useChirality=bool(self.useChirality), - useBondTypes=bool(self.useBondTypes), - ) - - class MorganFingerprintTransformer(FpsGeneratorTransformer): _regenerate_on_properties = ( "radius", @@ -157,82 +91,3 @@ def _transform_mol(self, mol) -> np.array: return self._fpgen.GetCountFingerprintAsNumPy(mol) else: return self._fpgen.GetFingerprintAsNumPy(mol) - - -class MorganFPGeneratorTransformer(FpsGeneratorTransformer): - _regenerate_on_properties = ( - "radius", - "fpSize", - "useChirality", - "useFeatures", - "useBondTypes", - ) - - def __init__( - self, - fpSize=2048, - radius=2, - useChirality=False, - useBondTypes=True, - useFeatures=False, - useCounts=False, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - dtype: np.dtype = None, - nBits: int = None, - ): - """Transform RDKit mols into Count or bit-based hashed MorganFingerprints - - Parameters - ---------- - fpsize : int, optional - Size of the hashed fingerprint, by default 2048 - radius : int, optional - Radius of the fingerprint, by default 2 - useChirality : bool, optional - Include chirality in calculation of the fingerprint keys, by default False - useBondTypes : bool, optional - Include bondtypes in calculation of the fingerprint keys, by default True - useFeatures : bool, optional - use chemical features, rather than atom-type in calculation of the fingerprint keys, by default False - useCounts : bool, optional - If toggled will create the count and not bit-based fingerprint, by default False - parallel : bool or int, optional - If True, will use all available cores, if int will use that many cores, by default False - safe_inference_mode : bool, optional - If True, will return masked arrays for invalid mols, by default False - """ - - self._initializing = True - super().__init__(parallel=parallel, safe_inference_mode=safe_inference_mode) - self.fpSize = fpSize - self.radius = radius - self.useChirality = useChirality - self.useFeatures = useFeatures - self.useCounts = useCounts - self.useBondTypes = useBondTypes - self.dtype = dtype - self.nBits = nBits - - self._generate_fp_generator() - delattr(self, "_initializing") - - def _generate_fp_generator(self): - if self.useFeatures: - atomInvariantsGenerator = GetMorganFeatureAtomInvGen() - else: - atomInvariantsGenerator = None - - self._fpgen = GetMorganGenerator( - radius=int(self.radius), - fpSize=int(self.fpSize), - includeChirality=bool(self.useChirality), - useBondTypes=bool(self.useBondTypes), - atomInvariantsGenerator=atomInvariantsGenerator, - ) - - def _transform_mol(self, mol) -> np.array: - if self.useCounts: - return self._fpgen.GetCountFingerprintAsNumPy(mol) - else: - return self._fpgen.GetFingerprintAsNumPy(mol) diff --git a/scikit_mol/fingerprints/rdkitfp.py b/scikit_mol/fingerprints/rdkitfp.py index 13d9a27..19a8d2e 100644 --- a/scikit_mol/fingerprints/rdkitfp.py +++ b/scikit_mol/fingerprints/rdkitfp.py @@ -11,83 +11,6 @@ from rdkit.Chem import rdFingerprintGenerator -class RDKitFingerprintTransformerClassic(FpsTransformer): - def __init__( - self, - minPath: int = 1, - maxPath: int = 7, - useHs: bool = True, - branchedPaths: bool = True, - useBondOrder: bool = True, - countSimulation: bool = False, - countBounds=None, - fpSize: int = 2048, - numBitsPerFeature: int = 2, - atomInvariantsGenerator=None, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - dtype: np.dtype = np.int8, - ): - """Calculates the RDKit fingerprints - - Parameters - ---------- - minPath : int, optional - the minimum path length (in bonds) to be included, by default 1 - maxPath : int, optional - the maximum path length (in bonds) to be included, by default 7 - useHs : bool, optional - toggles inclusion of Hs in paths (if the molecule has explicit Hs), by default True - branchedPaths : bool, optional - toggles generation of branched subgraphs, not just linear paths, by default True - useBondOrder : bool, optional - toggles inclusion of bond orders in the path hashes, by default True - countSimulation : bool, optional - if set, use count simulation while generating the fingerprint, by default False - countBounds : _type_, optional - boundaries for count simulation, corresponding bit will be set if the count is higher than the number provided for that spot, by default None - fpSize : int, optional - size of the generated fingerprint, does not affect the sparse versions, by default 2048 - numBitsPerFeature : int, optional - the number of bits set per path/subgraph found, by default 2 - atomInvariantsGenerator : _type_, optional - atom invariants to be used during fingerprint generation, by default None - """ - super().__init__( - parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype - ) - self.minPath = minPath - self.maxPath = maxPath - self.useHs = useHs - self.branchedPaths = branchedPaths - self.useBondOrder = useBondOrder - self.countSimulation = countSimulation - self.countBounds = countBounds - self.fpSize = fpSize - self.numBitsPerFeature = numBitsPerFeature - self.atomInvariantsGenerator = atomInvariantsGenerator - - warn( - "RDKitFingerprintTransformer will be replace by RDKitFPGeneratorTransformer, due to changes in RDKit!", - DeprecationWarning, - ) - - def _mol2fp(self, mol): - generator = rdFingerprintGenerator.GetRDKitFPGenerator( - minPath=int(self.minPath), - maxPath=int(self.maxPath), - useHs=bool(self.useHs), - branchedPaths=bool(self.branchedPaths), - useBondOrder=bool(self.useBondOrder), - countSimulation=bool(self.countSimulation), - countBounds=bool(self.countBounds), - fpSize=int(self.fpSize), - numBitsPerFeature=int(self.numBitsPerFeature), - atomInvariantsGenerator=self.atomInvariantsGenerator, - ) - return generator.GetFingerprint(mol) - - class RDKitFingerprintTransformer(FpsGeneratorTransformer): _regenerate_on_properties = ( "minPath", @@ -174,91 +97,3 @@ def _generate_fp_generator(self): countBounds=bool(self.countBounds), numBitsPerFeature=int(self.numBitsPerFeature), ) - - -class RDKitFPGeneratorTransformer(FpsGeneratorTransformer): - _regenerate_on_properties = ( - "minPath", - "maxPath", - "useHs", - "branchedPaths", - "useBondOrder", - "countSimulation", - "fpSize", - "countBounds", - "numBitsPerFeature", - ) - - def __init__( - self, - minPath: int = 1, - maxPath: int = 7, - useHs: bool = True, - branchedPaths: bool = True, - useBondOrder: bool = True, - countSimulation: bool = False, - countBounds=None, - fpSize: int = 2048, - numBitsPerFeature: int = 2, - useCounts: bool = False, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - ): - """Calculates the RDKit fingerprints - - Parameters - ---------- - minPath : int, optional - the minimum path length (in bonds) to be included, by default 1 - maxPath : int, optional - the maximum path length (in bonds) to be included, by default 7 - useHs : bool, optional - toggles inclusion of Hs in paths (if the molecule has explicit Hs), by default True - branchedPaths : bool, optional - toggles generation of branched subgraphs, not just linear paths, by default True - useBondOrder : bool, optional - toggles inclusion of bond orders in the path hashes, by default True - countSimulation : bool, optional - if set, use count simulation while generating the fingerprint, by default False - countBounds : _type_, optional - boundaries for count simulation, corresponding bit will be set if the count is higher than the number provided for that spot, by default None - fpSize : int, optional - size of the generated fingerprint, does not affect the sparse versions, by default 2048 - numBitsPerFeature : int, optional - the number of bits set per path/subgraph found, by default 2 - """ - self._initializing = True - super().__init__(parallel=parallel, safe_inference_mode=safe_inference_mode) - self.minPath = minPath - self.maxPath = maxPath - self.useHs = useHs - self.branchedPaths = branchedPaths - self.useBondOrder = useBondOrder - self.countSimulation = countSimulation - self.fpSize = fpSize - self.numBitsPerFeature = numBitsPerFeature - self.countBounds = countBounds - - self.useCounts = useCounts - - self._generate_fp_generator() - delattr(self, "_initializing") - - def _transform_mol(self, mol) -> np.array: - if self.useCounts: - return self._fpgen.GetCountFingerprintAsNumPy(mol) - else: - return self._fpgen.GetFingerprintAsNumPy(mol) - - def _generate_fp_generator(self): - self._fpgen = GetRDKitFPGenerator( - minPath=self.minPath, - maxPath=self.maxPath, - useHs=self.useHs, - branchedPaths=self.branchedPaths, - useBondOrder=self.useBondOrder, - countSimulation=self.countSimulation, - fpSize=self.fpSize, - countBounds=self.countBounds, - numBitsPerFeature=self.numBitsPerFeature, - ) diff --git a/scikit_mol/fingerprints/topologicaltorsion.py b/scikit_mol/fingerprints/topologicaltorsion.py index f983bcc..0cd5d9e 100644 --- a/scikit_mol/fingerprints/topologicaltorsion.py +++ b/scikit_mol/fingerprints/topologicaltorsion.py @@ -10,62 +10,6 @@ from rdkit.Chem.rdFingerprintGenerator import GetTopologicalTorsionGenerator -class TopologicalTorsionFingerprintTransformerClassic(FpsTransformer): - def __init__( - self, - targetSize: int = 4, - fromAtoms=0, - ignoreAtoms=0, - atomInvariants=0, - includeChirality: bool = False, - nBitsPerEntry: int = 4, - fpSize=2048, - useCounts: bool = False, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - dtype: np.dtype = np.int8, - ): - super().__init__( - parallel=parallel, safe_inference_mode=safe_inference_mode, dtype=dtype - ) - self.targetSize = targetSize - self.fromAtoms = fromAtoms - self.ignoreAtoms = ignoreAtoms - self.atomInvariants = atomInvariants - self.includeChirality = includeChirality - self.nBitsPerEntry = nBitsPerEntry - self.fpSize = fpSize - self.useCounts = useCounts - - warn( - "TopologicalTorsionFingerprintTransformer will be replace by TopologicalTorsionFPGeneatorTransformer, due to changes in RDKit!", - DeprecationWarning, - ) - - def _mol2fp(self, mol): - if self.useCounts: - return rdMolDescriptors.GetHashedTopologicalTorsionFingerprint( - mol, - nBits=int(self.fpSize), - targetSize=int(self.targetSize), - fromAtoms=self.fromAtoms, - ignoreAtoms=self.ignoreAtoms, - atomInvariants=self.atomInvariants, - includeChirality=bool(self.includeChirality), - ) - else: - return rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect( - mol, - nBits=int(self.fpSize), - targetSize=int(self.targetSize), - fromAtoms=self.fromAtoms, - ignoreAtoms=self.ignoreAtoms, - atomInvariants=self.atomInvariants, - includeChirality=bool(self.includeChirality), - nBitsPerEntry=int(self.nBitsPerEntry), - ) - - class TopologicalTorsionFingerprintTransformer(FpsGeneratorTransformer): _regenerate_on_properties = ("fpSize", "includeChirality", "targetSize") @@ -119,58 +63,3 @@ def _transform_mol(self, mol) -> np.array: ignoreAtoms=self.ignoreAtoms, customAtomInvariants=self.atomInvariants, ) - - -class TopologicalTorsionFPGeneatorTransformer(FpsGeneratorTransformer): - _regenerate_on_properties = ("fpSize", "includeChirality", "targetSize") - - def __init__( - self, - targetSize: int = 4, - fromAtoms=None, - ignoreAtoms=None, - atomInvariants=None, - confId=-1, - includeChirality: bool = False, - fpSize: int = 2048, - useCounts: bool = False, - parallel: Union[bool, int] = False, - safe_inference_mode: bool = False, - ): - self._initializing = True - super().__init__(parallel=parallel, safe_inference_mode=safe_inference_mode) - self.fpSize = fpSize - self.includeChirality = includeChirality - self.targetSize = targetSize - - self.fromAtoms = fromAtoms - self.ignoreAtoms = ignoreAtoms - self.atomInvariants = atomInvariants - self.confId = confId - self.useCounts = useCounts - - self._generate_fp_generator() - delattr(self, "_initializing") - - def _generate_fp_generator(self): - self._fpgen = GetTopologicalTorsionGenerator( - torsionAtomCount=self.targetSize, - includeChirality=self.includeChirality, - fpSize=self.fpSize, - ) - - def _transform_mol(self, mol) -> np.array: - if self.useCounts: - return self._fpgen.GetCountFingerprintAsNumPy( - mol, - fromAtoms=self.fromAtoms, - ignoreAtoms=self.ignoreAtoms, - customAtomInvariants=self.atomInvariants, - ) - else: - return self._fpgen.GetFingerprintAsNumPy( - mol, - fromAtoms=self.fromAtoms, - ignoreAtoms=self.ignoreAtoms, - customAtomInvariants=self.atomInvariants, - ) diff --git a/tests/test_transformers.py b/tests/test_transformers.py index 9352441..a47b8bf 100644 --- a/tests/test_transformers.py +++ b/tests/test_transformers.py @@ -24,10 +24,6 @@ SECFingerprintTransformer, MHFingerprintTransformer, AvalonFingerprintTransformer, - MorganFPGeneratorTransformer, - RDKitFPGeneratorTransformer, - AtomPairFPGeneratorTransformer, - TopologicalTorsionFPGeneatorTransformer, ) from scikit_mol.fingerprints.baseclasses import BaseFpsTransformer @@ -50,13 +46,6 @@ def test_transformer(SLC6A4_subset): X_train, X_test = X_smiles[:128], X_smiles[128:] Y_train, Y_test = Y[:128], Y[128:] - (MorganFPGeneratorTransformer,) - ( - RDKitFPGeneratorTransformer, - AtomPairFPGeneratorTransformer, - TopologicalTorsionFPGeneatorTransformer, - ) - # run FP with default parameters except when useCounts can be given as an argument FP_dict = { "MACCSTransformer": [MACCSKeysFingerprintTransformer, None], @@ -79,19 +68,6 @@ def test_transformer(SLC6A4_subset): "SECFingerprintTransformer": [SECFingerprintTransformer, None], "MHFingerprintTransformer": [MHFingerprintTransformer, None], "AvalonFingerprintTransformer": [AvalonFingerprintTransformer, None], - "MorganFPGeneratorTransformer": [MorganFPGeneratorTransformer, True], - "MorganFPGeneratorTransformer": [MorganFPGeneratorTransformer, False], - "RDKitFPGeneratorTransformer": [RDKitFPGeneratorTransformer, None], - "AtomPairFPGeneratorTransformer": [AtomPairFPGeneratorTransformer, True], - "AtomPairFPGeneratorTransformer": [AtomPairFPGeneratorTransformer, False], - "TopologicalTorsionFPGeneatorTransformer": [ - TopologicalTorsionFPGeneatorTransformer, - True, - ], - "TopologicalTorsionFPGeneatorTransformer": [ - TopologicalTorsionFPGeneatorTransformer, - False, - ], } # fit on toy data and print train/test score if successful or collect the failed FP @@ -162,19 +138,6 @@ def test_transformer_pandas_output(SLC6A4_subset, pandas_output): "SECFingerprintTransformer": [SECFingerprintTransformer, None], "MHFingerprintTransformer": [MHFingerprintTransformer, None], "AvalonFingerprintTransformer": [AvalonFingerprintTransformer, None], - "MorganFPGeneratorTransformer": [MorganFPGeneratorTransformer, True], - "MorganFPGeneratorTransformer": [MorganFPGeneratorTransformer, False], - "RDKitFPGeneratorTransformer": [RDKitFPGeneratorTransformer, None], - "AtomPairFPGeneratorTransformer": [AtomPairFPGeneratorTransformer, True], - "AtomPairFPGeneratorTransformer": [AtomPairFPGeneratorTransformer, False], - "TopologicalTorsionFPGeneatorTransformer": [ - TopologicalTorsionFPGeneatorTransformer, - True, - ], - "TopologicalTorsionFPGeneatorTransformer": [ - TopologicalTorsionFPGeneatorTransformer, - False, - ], } # fit on toy data and check that the output is a pandas dataframe From f1bbc309e4804f0b8923f59aa9a684610b9a4f44 Mon Sep 17 00:00:00 2001 From: Esben Jannik Bjerrum Date: Sun, 24 Nov 2024 10:31:34 +0100 Subject: [PATCH 24/24] Updated and reran notebooks --- notebooks/01_basic_usage.ipynb | 564 +- notebooks/01_basic_usage.py | 19 +- notebooks/02_descriptor_transformer.ipynb | 78 +- notebooks/03_example_pipeline.ipynb | 130 +- notebooks/04_standardizer.ipynb | 112 +- notebooks/05_smiles_sanitaztion.ipynb | 76 +- notebooks/06_hyperparameter_tuning.ipynb | 257 +- notebooks/06_hyperparameter_tuning.py | 50 +- notebooks/07_parallel_transforms.ipynb | 4956 +++++++++++++++- notebooks/08_external_library_skopt.ipynb | 629 ++- notebooks/08_external_library_skopt.py | 36 +- ..._Usage_with_FingerPrint_Transformers.ipynb | 369 +- ...hod_Usage_with_FingerPrint_Transformers.py | 103 +- notebooks/10_pipeline_pandas_output.ipynb | 5024 +++++++++++++++-- notebooks/11_safe_inference.ipynb | 458 +- notebooks/11_safe_inference.py | 29 +- 16 files changed, 11552 insertions(+), 1338 deletions(-) diff --git a/notebooks/01_basic_usage.ipynb b/notebooks/01_basic_usage.ipynb index 4c62abe..e254859 100644 --- a/notebooks/01_basic_usage.ipynb +++ b/notebooks/01_basic_usage.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "8a3e313c", + "id": "aa079ac3", "metadata": {}, "source": [ "# Scikit-Mol\n", @@ -13,7 +13,7 @@ }, { "cell_type": "markdown", - "id": "7bcbed23", + "id": "76d24789", "metadata": {}, "source": [ "The transformer classes are easy to load, configure and use to process molecular information into vectorized formats using fingerprinters or collections of descriptors. For demonstration purposes, let's load a MorganTransformer, that can convert a list of RDKit molecular objects into a numpy array of morgan fingerprints. First create some molecules from SMILES strings." @@ -22,13 +22,13 @@ { "cell_type": "code", "execution_count": 1, - "id": "f8025236", + "id": "2c8cad03", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:09.802220Z", - "iopub.status.busy": "2024-04-12T12:10:09.802030Z", - "iopub.status.idle": "2024-04-12T12:10:09.808949Z", - "shell.execute_reply": "2024-04-12T12:10:09.808440Z" + "iopub.execute_input": "2024-11-24T09:27:16.292725Z", + "iopub.status.busy": "2024-11-24T09:27:16.292083Z", + "iopub.status.idle": "2024-11-24T09:27:16.306663Z", + "shell.execute_reply": "2024-11-24T09:27:16.304935Z" } }, "outputs": [], @@ -39,32 +39,34 @@ { "cell_type": "code", "execution_count": 2, - "id": "58a33f4d", + "id": "8d5b2333", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:09.811277Z", - "iopub.status.busy": "2024-04-12T12:10:09.811060Z", - "iopub.status.idle": "2024-04-12T12:10:09.936897Z", - "shell.execute_reply": "2024-04-12T12:10:09.936201Z" + "iopub.execute_input": "2024-11-24T09:27:16.313611Z", + "iopub.status.busy": "2024-11-24T09:27:16.313028Z", + "iopub.status.idle": "2024-11-24T09:27:16.510254Z", + "shell.execute_reply": "2024-11-24T09:27:16.509620Z" } }, "outputs": [], "source": [ "from rdkit import Chem\n", "\n", - "smiles_strings = [\"C12C([C@@H](OC(C=3C=CC(=CC3)F)C=4C=CC(=CC4)F)CC(N1CCCCCC5=CC=CC=C5)CC2)C(=O)OC\", \n", - "\"O(C1=NC=C2C(CN(CC2=C1)C)C3=CC=C(OC)C=C3)CCCN(CC)CC\",\n", - "\"O=S(=O)(N(CC=1C=CC2=CC=CC=C2C1)[C@@H]3CCNC3)C\",\n", - "\"C1(=C2C(CCCC2O)=NC=3C1=CC=CC3)NCC=4C=CC(=CC4)Cl\",\n", - "\"C1NC[C@@H](C1)[C@H](OC=2C=CC(=NC2C)OC)CC(C)C\",\n", - "\"FC(F)(F)C=1C(CN(C2CCNCC2)CC(CC)CC)=CC=CC1\"]\n", + "smiles_strings = [\n", + " \"C12C([C@@H](OC(C=3C=CC(=CC3)F)C=4C=CC(=CC4)F)CC(N1CCCCCC5=CC=CC=C5)CC2)C(=O)OC\",\n", + " \"O(C1=NC=C2C(CN(CC2=C1)C)C3=CC=C(OC)C=C3)CCCN(CC)CC\",\n", + " \"O=S(=O)(N(CC=1C=CC2=CC=CC=C2C1)[C@@H]3CCNC3)C\",\n", + " \"C1(=C2C(CCCC2O)=NC=3C1=CC=CC3)NCC=4C=CC(=CC4)Cl\",\n", + " \"C1NC[C@@H](C1)[C@H](OC=2C=CC(=NC2C)OC)CC(C)C\",\n", + " \"FC(F)(F)C=1C(CN(C2CCNCC2)CC(CC)CC)=CC=CC1\",\n", + "]\n", "\n", "mols = [Chem.MolFromSmiles(smiles) for smiles in smiles_strings]" ] }, { "cell_type": "markdown", - "id": "0228c878", + "id": "b9a588c7", "metadata": {}, "source": [ "Next we import the Morgan fingerprint transformer" @@ -73,13 +75,13 @@ { "cell_type": "code", "execution_count": 3, - "id": "cdb821a1", + "id": "0a625dda", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:09.939980Z", - "iopub.status.busy": "2024-04-12T12:10:09.939552Z", - "iopub.status.idle": "2024-04-12T12:10:10.505528Z", - "shell.execute_reply": "2024-04-12T12:10:10.504885Z" + "iopub.execute_input": "2024-11-24T09:27:16.513123Z", + "iopub.status.busy": "2024-11-24T09:27:16.512856Z", + "iopub.status.idle": "2024-11-24T09:27:17.089043Z", + "shell.execute_reply": "2024-11-24T09:27:17.088357Z" } }, "outputs": [ @@ -100,10 +102,10 @@ }, { "cell_type": "markdown", - "id": "e8ebae67", + "id": "355610d1", "metadata": {}, "source": [ - "It actually renders as a cute little interactive block in the Jupyter notebook and lists the options that are not the default values. If we print it, it also gives the information on the settings. \n", + "It actually renders as a cute little interactive block in the Jupyter notebook and lists the options that are not the default values. If we print it, it also gives the information on the settings.\n", "\n", "![An image of the interactive transformer widget](images/Transformer_Widget.jpg \"Transformer object rendering in Jupyter\")\n", "\n", @@ -113,20 +115,424 @@ { "cell_type": "code", "execution_count": 4, - "id": "c3a24f4e", + "id": "9a801d0f", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:10.508400Z", - "iopub.status.busy": "2024-04-12T12:10:10.508055Z", - "iopub.status.idle": "2024-04-12T12:10:10.514636Z", - "shell.execute_reply": "2024-04-12T12:10:10.514117Z" + "iopub.execute_input": "2024-11-24T09:27:17.091942Z", + "iopub.status.busy": "2024-11-24T09:27:17.091571Z", + "iopub.status.idle": "2024-11-24T09:27:17.098501Z", + "shell.execute_reply": "2024-11-24T09:27:17.097922Z" } }, "outputs": [ { "data": { "text/html": [ - "
MorganFingerprintTransformer(radius=3)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
MorganFingerprintTransformer(radius=3)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "MorganFingerprintTransformer(radius=3)" @@ -143,7 +549,7 @@ }, { "cell_type": "markdown", - "id": "c6e5de37", + "id": "556858b4", "metadata": {}, "source": [ "If we want to get all the settings explicitly, we can use the .get_params() method." @@ -152,22 +558,23 @@ { "cell_type": "code", "execution_count": 5, - "id": "112afff2", + "id": "500dc6f7", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:10.517110Z", - "iopub.status.busy": "2024-04-12T12:10:10.516867Z", - "iopub.status.idle": "2024-04-12T12:10:10.521207Z", - "shell.execute_reply": "2024-04-12T12:10:10.520689Z" + "iopub.execute_input": "2024-11-24T09:27:17.101153Z", + "iopub.status.busy": "2024-11-24T09:27:17.100929Z", + "iopub.status.idle": "2024-11-24T09:27:17.105319Z", + "shell.execute_reply": "2024-11-24T09:27:17.104586Z" } }, "outputs": [ { "data": { "text/plain": [ - "{'nBits': 2048,\n", + "{'fpSize': 2048,\n", " 'parallel': False,\n", " 'radius': 3,\n", + " 'safe_inference_mode': False,\n", " 'useBondTypes': True,\n", " 'useChirality': False,\n", " 'useCounts': False,\n", @@ -186,7 +593,7 @@ }, { "cell_type": "markdown", - "id": "45296da6", + "id": "d453fa33", "metadata": {}, "source": [ "The corresponding .set_params() method can be used to update the settings from options or from a dictionary (via ** unpackaging). The get_params and set_params methods are sometimes used by sklearn, as example hyperparameter search objects." @@ -195,13 +602,13 @@ { "cell_type": "code", "execution_count": 6, - "id": "4229d3d3", + "id": "3a27b07a", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:10.523546Z", - "iopub.status.busy": "2024-04-12T12:10:10.523347Z", - "iopub.status.idle": "2024-04-12T12:10:10.527067Z", - "shell.execute_reply": "2024-04-12T12:10:10.526450Z" + "iopub.execute_input": "2024-11-24T09:27:17.107710Z", + "iopub.status.busy": "2024-11-24T09:27:17.107495Z", + "iopub.status.idle": "2024-11-24T09:27:17.111268Z", + "shell.execute_reply": "2024-11-24T09:27:17.110754Z" } }, "outputs": [ @@ -209,20 +616,20 @@ "name": "stdout", "output_type": "stream", "text": [ - "MorganFingerprintTransformer(nBits=256)\n" + "MorganFingerprintTransformer(fpSize=256)\n" ] } ], "source": [ "parameters[\"radius\"] = 2\n", - "parameters[\"nBits\"] = 256\n", + "parameters[\"fpSize\"] = 256\n", "transformer.set_params(**parameters)\n", "print(transformer)" ] }, { "cell_type": "markdown", - "id": "1d38c224", + "id": "3dd372d3", "metadata": {}, "source": [ "Transformation is easy, simply use the .transform() method. For sklearn compatibility the scikit-learn transformers also have a .fit_transform() method, but it is usually not fitting anything." @@ -231,13 +638,13 @@ { "cell_type": "code", "execution_count": 7, - "id": "d2276e30", + "id": "0f141920", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:10.529451Z", - "iopub.status.busy": "2024-04-12T12:10:10.529229Z", - "iopub.status.idle": "2024-04-12T12:10:10.533310Z", - "shell.execute_reply": "2024-04-12T12:10:10.532819Z" + "iopub.execute_input": "2024-11-24T09:27:17.113572Z", + "iopub.status.busy": "2024-11-24T09:27:17.113344Z", + "iopub.status.idle": "2024-11-24T09:27:17.117356Z", + "shell.execute_reply": "2024-11-24T09:27:17.116845Z" } }, "outputs": [ @@ -245,7 +652,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "fps is a with shape (6, 256) and data type int8\n" + "fps is a with shape (6, 256) and data type uint8\n" ] } ], @@ -256,7 +663,7 @@ }, { "cell_type": "markdown", - "id": "666bf64b", + "id": "9cb75226", "metadata": {}, "source": [ "For sklearn compatibility, the transform function can be given a second parameter, usually representing the targets in the machine learning, but it is simply ignored most of the time" @@ -265,13 +672,13 @@ { "cell_type": "code", "execution_count": 8, - "id": "d3b01806", + "id": "481e527f", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:10.535668Z", - "iopub.status.busy": "2024-04-12T12:10:10.535455Z", - "iopub.status.idle": "2024-04-12T12:10:10.540535Z", - "shell.execute_reply": "2024-04-12T12:10:10.539917Z" + "iopub.execute_input": "2024-11-24T09:27:17.119855Z", + "iopub.status.busy": "2024-11-24T09:27:17.119584Z", + "iopub.status.idle": "2024-11-24T09:27:17.124520Z", + "shell.execute_reply": "2024-11-24T09:27:17.124025Z" } }, "outputs": [ @@ -283,7 +690,7 @@ " [1, 0, 0, ..., 0, 0, 0],\n", " [0, 0, 0, ..., 0, 0, 1],\n", " [1, 1, 0, ..., 0, 0, 0],\n", - " [1, 1, 0, ..., 0, 0, 0]], dtype=int8)" + " [1, 1, 0, ..., 0, 0, 0]], dtype=uint8)" ] }, "execution_count": 8, @@ -298,7 +705,7 @@ }, { "cell_type": "markdown", - "id": "1e5c385f", + "id": "500cec09", "metadata": {}, "source": [ "Sometimes we may want to transform SMILES into molecules, and scikit-mol also has a transformer for that. It simply takes a list of SMILES and produces a list of RDKit molecules, this may come in handy when building pipelines for machine learning models, as we will demo in another notebook." @@ -307,13 +714,13 @@ { "cell_type": "code", "execution_count": 9, - "id": "26081bb2", + "id": "7773a5a0", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:10.542876Z", - "iopub.status.busy": "2024-04-12T12:10:10.542661Z", - "iopub.status.idle": "2024-04-12T12:10:10.546656Z", - "shell.execute_reply": "2024-04-12T12:10:10.546143Z" + "iopub.execute_input": "2024-11-24T09:27:17.126934Z", + "iopub.status.busy": "2024-11-24T09:27:17.126713Z", + "iopub.status.idle": "2024-11-24T09:27:17.131063Z", + "shell.execute_reply": "2024-11-24T09:27:17.130539Z" } }, "outputs": [ @@ -327,6 +734,7 @@ ], "source": [ "from scikit_mol.conversions import SmilesToMolTransformer\n", + "\n", "smi2mol = SmilesToMolTransformer()\n", "print(smi2mol)" ] @@ -334,13 +742,13 @@ { "cell_type": "code", "execution_count": 10, - "id": "6b0e5f4a", + "id": "fa484453", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:10.548964Z", - "iopub.status.busy": "2024-04-12T12:10:10.548714Z", - "iopub.status.idle": "2024-04-12T12:10:10.553416Z", - "shell.execute_reply": "2024-04-12T12:10:10.552805Z" + "iopub.execute_input": "2024-11-24T09:27:17.133328Z", + "iopub.status.busy": "2024-11-24T09:27:17.133133Z", + "iopub.status.idle": "2024-11-24T09:27:17.137378Z", + "shell.execute_reply": "2024-11-24T09:27:17.136857Z" } }, "outputs": [ @@ -348,12 +756,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "[[]\n", - " []\n", - " []\n", - " []\n", - " []\n", - " []]\n" + "[[]\n", + " []\n", + " []\n", + " []\n", + " []\n", + " []]\n" ] } ], diff --git a/notebooks/01_basic_usage.py b/notebooks/01_basic_usage.py index d6ca01c..65631a3 100644 --- a/notebooks/01_basic_usage.py +++ b/notebooks/01_basic_usage.py @@ -29,12 +29,14 @@ # %% from rdkit import Chem -smiles_strings = ["C12C([C@@H](OC(C=3C=CC(=CC3)F)C=4C=CC(=CC4)F)CC(N1CCCCCC5=CC=CC=C5)CC2)C(=O)OC", -"O(C1=NC=C2C(CN(CC2=C1)C)C3=CC=C(OC)C=C3)CCCN(CC)CC", -"O=S(=O)(N(CC=1C=CC2=CC=CC=C2C1)[C@@H]3CCNC3)C", -"C1(=C2C(CCCC2O)=NC=3C1=CC=CC3)NCC=4C=CC(=CC4)Cl", -"C1NC[C@@H](C1)[C@H](OC=2C=CC(=NC2C)OC)CC(C)C", -"FC(F)(F)C=1C(CN(C2CCNCC2)CC(CC)CC)=CC=CC1"] +smiles_strings = [ + "C12C([C@@H](OC(C=3C=CC(=CC3)F)C=4C=CC(=CC4)F)CC(N1CCCCCC5=CC=CC=C5)CC2)C(=O)OC", + "O(C1=NC=C2C(CN(CC2=C1)C)C3=CC=C(OC)C=C3)CCCN(CC)CC", + "O=S(=O)(N(CC=1C=CC2=CC=CC=C2C1)[C@@H]3CCNC3)C", + "C1(=C2C(CCCC2O)=NC=3C1=CC=CC3)NCC=4C=CC(=CC4)Cl", + "C1NC[C@@H](C1)[C@H](OC=2C=CC(=NC2C)OC)CC(C)C", + "FC(F)(F)C=1C(CN(C2CCNCC2)CC(CC)CC)=CC=CC1", +] mols = [Chem.MolFromSmiles(smiles) for smiles in smiles_strings] @@ -48,7 +50,7 @@ print(transformer) # %% [markdown] -# It actually renders as a cute little interactive block in the Jupyter notebook and lists the options that are not the default values. If we print it, it also gives the information on the settings. +# It actually renders as a cute little interactive block in the Jupyter notebook and lists the options that are not the default values. If we print it, it also gives the information on the settings. # # ![An image of the interactive transformer widget](images/Transformer_Widget.jpg "Transformer object rendering in Jupyter") # @@ -69,7 +71,7 @@ # %% parameters["radius"] = 2 -parameters["nBits"] = 256 +parameters["fpSize"] = 256 transformer.set_params(**parameters) print(transformer) @@ -92,6 +94,7 @@ # %% from scikit_mol.conversions import SmilesToMolTransformer + smi2mol = SmilesToMolTransformer() print(smi2mol) diff --git a/notebooks/02_descriptor_transformer.ipynb b/notebooks/02_descriptor_transformer.ipynb index 268e235..43b3075 100644 --- a/notebooks/02_descriptor_transformer.ipynb +++ b/notebooks/02_descriptor_transformer.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "80ef57b6", + "id": "e3cf34ca", "metadata": {}, "source": [ "# Desc2DTransformer: RDKit descriptors transformer\n", @@ -13,13 +13,13 @@ { "cell_type": "code", "execution_count": 1, - "id": "51b69a7e", + "id": "81745b1f", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:11.861457Z", - "iopub.status.busy": "2024-04-12T12:10:11.861246Z", - "iopub.status.idle": "2024-04-12T12:10:12.860086Z", - "shell.execute_reply": "2024-04-12T12:10:12.859376Z" + "iopub.execute_input": "2024-11-24T09:27:18.828147Z", + "iopub.status.busy": "2024-11-24T09:27:18.827339Z", + "iopub.status.idle": "2024-11-24T09:27:19.887178Z", + "shell.execute_reply": "2024-11-24T09:27:19.886482Z" }, "lines_to_next_cell": 0 }, @@ -33,7 +33,7 @@ }, { "cell_type": "markdown", - "id": "1e253d9e", + "id": "2293e9e6", "metadata": {}, "source": [ "After instantiation of the descriptor transformer, we can query which descriptors it found available in the RDKit framework." @@ -42,13 +42,13 @@ { "cell_type": "code", "execution_count": 2, - "id": "f1d8fc37", + "id": "dd9a2ad0", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:12.863149Z", - "iopub.status.busy": "2024-04-12T12:10:12.862837Z", - "iopub.status.idle": "2024-04-12T12:10:12.868668Z", - "shell.execute_reply": "2024-04-12T12:10:12.868151Z" + "iopub.execute_input": "2024-11-24T09:27:19.890505Z", + "iopub.status.busy": "2024-11-24T09:27:19.889986Z", + "iopub.status.idle": "2024-11-24T09:27:19.896597Z", + "shell.execute_reply": "2024-11-24T09:27:19.896028Z" } }, "outputs": [ @@ -56,7 +56,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "There are 209 available descriptors\n", + "There are 210 available descriptors\n", "The first five descriptor names: ['MaxAbsEStateIndex', 'MaxEStateIndex', 'MinAbsEStateIndex', 'MinEStateIndex', 'qed']\n" ] } @@ -70,7 +70,7 @@ }, { "cell_type": "markdown", - "id": "5bb186e5", + "id": "110c00c0", "metadata": {}, "source": [ "We can transform molecules to their descriptor profiles" @@ -79,19 +79,31 @@ { "cell_type": "code", "execution_count": 3, - "id": "702168a7", + "id": "4431a910", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:12.871338Z", - "iopub.status.busy": "2024-04-12T12:10:12.870842Z", - "iopub.status.idle": "2024-04-12T12:10:13.031911Z", - "shell.execute_reply": "2024-04-12T12:10:13.031258Z" + "iopub.execute_input": "2024-11-24T09:27:19.899516Z", + "iopub.status.busy": "2024-11-24T09:27:19.899244Z", + "iopub.status.idle": "2024-11-24T09:27:20.125197Z", + "shell.execute_reply": "2024-11-24T09:27:20.123935Z" } }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:19] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:19] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:19] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:19] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:19] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:19] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -110,7 +122,7 @@ }, { "cell_type": "markdown", - "id": "ea6f1325", + "id": "fdcb0698", "metadata": {}, "source": [ "If we only want some of them, this can be specified at object instantiation." @@ -119,13 +131,13 @@ { "cell_type": "code", "execution_count": 4, - "id": "ab72d398", + "id": "6caa9a54", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:13.034545Z", - "iopub.status.busy": "2024-04-12T12:10:13.034315Z", - "iopub.status.idle": "2024-04-12T12:10:13.038330Z", - "shell.execute_reply": "2024-04-12T12:10:13.037823Z" + "iopub.execute_input": "2024-11-24T09:27:20.131216Z", + "iopub.status.busy": "2024-11-24T09:27:20.130932Z", + "iopub.status.idle": "2024-11-24T09:27:20.135202Z", + "shell.execute_reply": "2024-11-24T09:27:20.134644Z" } }, "outputs": [ @@ -145,7 +157,7 @@ }, { "cell_type": "markdown", - "id": "c974320e", + "id": "52eaef77", "metadata": {}, "source": [ "If we want to update the selected descriptors on an already existing object, this can be done via the .set_params() method" @@ -154,13 +166,13 @@ { "cell_type": "code", "execution_count": 5, - "id": "4af5488d", + "id": "78fc5691", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:13.040697Z", - "iopub.status.busy": "2024-04-12T12:10:13.040482Z", - "iopub.status.idle": "2024-04-12T12:10:13.044948Z", - "shell.execute_reply": "2024-04-12T12:10:13.044445Z" + "iopub.execute_input": "2024-11-24T09:27:20.138477Z", + "iopub.status.busy": "2024-11-24T09:27:20.138022Z", + "iopub.status.idle": "2024-11-24T09:27:20.151081Z", + "shell.execute_reply": "2024-11-24T09:27:20.150278Z" } }, "outputs": [ @@ -180,7 +192,7 @@ { "cell_type": "code", "execution_count": null, - "id": "29367a7f", + "id": "4796a16e", "metadata": {}, "outputs": [], "source": [] diff --git a/notebooks/03_example_pipeline.ipynb b/notebooks/03_example_pipeline.ipynb index f2c9fbd..858eaf0 100644 --- a/notebooks/03_example_pipeline.ipynb +++ b/notebooks/03_example_pipeline.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "f3f842d3", + "id": "e7c43298", "metadata": {}, "source": [ "# Pipelining the scikit-mol transformer\n", @@ -15,13 +15,13 @@ { "cell_type": "code", "execution_count": 1, - "id": "b92c5a96", + "id": "79139b10", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:14.474481Z", - "iopub.status.busy": "2024-04-12T12:10:14.474256Z", - "iopub.status.idle": "2024-04-12T12:10:15.156429Z", - "shell.execute_reply": "2024-04-12T12:10:15.155756Z" + "iopub.execute_input": "2024-11-24T09:27:21.863626Z", + "iopub.status.busy": "2024-11-24T09:27:21.863272Z", + "iopub.status.idle": "2024-11-24T09:27:22.718519Z", + "shell.execute_reply": "2024-11-24T09:27:22.717789Z" } }, "outputs": [], @@ -39,13 +39,13 @@ { "cell_type": "code", "execution_count": 2, - "id": "0d79bc45", + "id": "17a9cdd7", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:15.159285Z", - "iopub.status.busy": "2024-04-12T12:10:15.158830Z", - "iopub.status.idle": "2024-04-12T12:10:15.164599Z", - "shell.execute_reply": "2024-04-12T12:10:15.163957Z" + "iopub.execute_input": "2024-11-24T09:27:22.722219Z", + "iopub.status.busy": "2024-11-24T09:27:22.721369Z", + "iopub.status.idle": "2024-11-24T09:27:22.727326Z", + "shell.execute_reply": "2024-11-24T09:27:22.726709Z" }, "lines_to_next_cell": 0 }, @@ -57,7 +57,7 @@ }, { "cell_type": "markdown", - "id": "3569affd", + "id": "066131b8", "metadata": {}, "source": [ "The dataset is a subset of the SLC6A4 actives from ExcapeDB. They are hand selected to give test set performance despite the small size, and are provided as example data only and should not be used to build serious QSAR models.\n", @@ -68,13 +68,13 @@ { "cell_type": "code", "execution_count": 3, - "id": "594f45ba", + "id": "a3ec0a23", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:15.167046Z", - "iopub.status.busy": "2024-04-12T12:10:15.166800Z", - "iopub.status.idle": "2024-04-12T12:10:15.202826Z", - "shell.execute_reply": "2024-04-12T12:10:15.202180Z" + "iopub.execute_input": "2024-11-24T09:27:22.729951Z", + "iopub.status.busy": "2024-11-24T09:27:22.729732Z", + "iopub.status.idle": "2024-11-24T09:27:22.769704Z", + "shell.execute_reply": "2024-11-24T09:27:22.768854Z" }, "lines_to_next_cell": 0 }, @@ -94,7 +94,7 @@ }, { "cell_type": "markdown", - "id": "04af16b2", + "id": "eccaf4af", "metadata": {}, "source": [ "Then, let's import some tools from scikit-learn and two transformers from scikit-mol" @@ -103,13 +103,13 @@ { "cell_type": "code", "execution_count": 4, - "id": "ed19f736", + "id": "4eb8f0fa", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:15.205344Z", - "iopub.status.busy": "2024-04-12T12:10:15.205135Z", - "iopub.status.idle": "2024-04-12T12:10:15.593454Z", - "shell.execute_reply": "2024-04-12T12:10:15.592778Z" + "iopub.execute_input": "2024-11-24T09:27:22.772861Z", + "iopub.status.busy": "2024-11-24T09:27:22.772534Z", + "iopub.status.idle": "2024-11-24T09:27:23.182612Z", + "shell.execute_reply": "2024-11-24T09:27:23.181966Z" }, "lines_to_next_cell": 0 }, @@ -125,13 +125,13 @@ { "cell_type": "code", "execution_count": 5, - "id": "c4a255f4", + "id": "99edec0f", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:15.596462Z", - "iopub.status.busy": "2024-04-12T12:10:15.595952Z", - "iopub.status.idle": "2024-04-12T12:10:15.601122Z", - "shell.execute_reply": "2024-04-12T12:10:15.600608Z" + "iopub.execute_input": "2024-11-24T09:27:23.185612Z", + "iopub.status.busy": "2024-11-24T09:27:23.185269Z", + "iopub.status.idle": "2024-11-24T09:27:23.190844Z", + "shell.execute_reply": "2024-11-24T09:27:23.190290Z" } }, "outputs": [], @@ -141,7 +141,7 @@ }, { "cell_type": "markdown", - "id": "a088665f", + "id": "b8380817", "metadata": {}, "source": [ "After a split into train and test, we'll build the first pipeline" @@ -150,13 +150,13 @@ { "cell_type": "code", "execution_count": 6, - "id": "0ddbf668", + "id": "a27d6ff9", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:15.603558Z", - "iopub.status.busy": "2024-04-12T12:10:15.603329Z", - "iopub.status.idle": "2024-04-12T12:10:15.608527Z", - "shell.execute_reply": "2024-04-12T12:10:15.608014Z" + "iopub.execute_input": "2024-11-24T09:27:23.193426Z", + "iopub.status.busy": "2024-11-24T09:27:23.193188Z", + "iopub.status.idle": "2024-11-24T09:27:23.198881Z", + "shell.execute_reply": "2024-11-24T09:27:23.198225Z" } }, "outputs": [ @@ -176,7 +176,7 @@ }, { "cell_type": "markdown", - "id": "004b0d25", + "id": "6c12f9a8", "metadata": {}, "source": [ "We can do the fit by simply providing the list of RDKit molecule objects" @@ -185,13 +185,13 @@ { "cell_type": "code", "execution_count": 7, - "id": "231d0534", + "id": "634ca919", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:15.610826Z", - "iopub.status.busy": "2024-04-12T12:10:15.610623Z", - "iopub.status.idle": "2024-04-12T12:10:15.735973Z", - "shell.execute_reply": "2024-04-12T12:10:15.735308Z" + "iopub.execute_input": "2024-11-24T09:27:23.201230Z", + "iopub.status.busy": "2024-11-24T09:27:23.201013Z", + "iopub.status.idle": "2024-11-24T09:27:23.265644Z", + "shell.execute_reply": "2024-11-24T09:27:23.264698Z" }, "lines_to_next_cell": 0 }, @@ -213,7 +213,7 @@ }, { "cell_type": "markdown", - "id": "55915786", + "id": "8440cc5a", "metadata": {}, "source": [ "Nevermind the performance, or the exact value of the prediction, this is for demonstration purpures. We can easily predict on lists of molecules" @@ -222,13 +222,13 @@ { "cell_type": "code", "execution_count": 8, - "id": "5d1e9220", + "id": "f4431aab", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:15.738365Z", - "iopub.status.busy": "2024-04-12T12:10:15.738132Z", - "iopub.status.idle": "2024-04-12T12:10:15.744818Z", - "shell.execute_reply": "2024-04-12T12:10:15.744279Z" + "iopub.execute_input": "2024-11-24T09:27:23.269015Z", + "iopub.status.busy": "2024-11-24T09:27:23.268218Z", + "iopub.status.idle": "2024-11-24T09:27:23.280889Z", + "shell.execute_reply": "2024-11-24T09:27:23.279967Z" } }, "outputs": [ @@ -249,7 +249,7 @@ }, { "cell_type": "markdown", - "id": "07cf53ea", + "id": "a60e242b", "metadata": {}, "source": [ "We can also expand the already fitted pipeline, how about creating a pipeline that can predict directly from SMILES? With scikit-mol that is easy!" @@ -258,13 +258,13 @@ { "cell_type": "code", "execution_count": 9, - "id": "eb8ce486", + "id": "a908097d", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:15.747499Z", - "iopub.status.busy": "2024-04-12T12:10:15.746917Z", - "iopub.status.idle": "2024-04-12T12:10:15.754613Z", - "shell.execute_reply": "2024-04-12T12:10:15.754088Z" + "iopub.execute_input": "2024-11-24T09:27:23.284650Z", + "iopub.status.busy": "2024-11-24T09:27:23.283862Z", + "iopub.status.idle": "2024-11-24T09:27:23.298454Z", + "shell.execute_reply": "2024-11-24T09:27:23.297546Z" } }, "outputs": [ @@ -288,13 +288,13 @@ { "cell_type": "code", "execution_count": 10, - "id": "1444b605", + "id": "0124653c", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:15.757246Z", - "iopub.status.busy": "2024-04-12T12:10:15.756914Z", - "iopub.status.idle": "2024-04-12T12:10:15.761416Z", - "shell.execute_reply": "2024-04-12T12:10:15.760858Z" + "iopub.execute_input": "2024-11-24T09:27:23.302185Z", + "iopub.status.busy": "2024-11-24T09:27:23.301318Z", + "iopub.status.idle": "2024-11-24T09:27:23.307070Z", + "shell.execute_reply": "2024-11-24T09:27:23.306539Z" } }, "outputs": [ @@ -315,7 +315,7 @@ }, { "cell_type": "markdown", - "id": "90d7817b", + "id": "069e2d01", "metadata": {}, "source": [ "From here, the pipelines could be pickled, and later loaded for easy prediction on RDKit molecule objects or SMILES in other scripts. The transformation with the MorganTransformer will be the same as during fitting, so no need to remember if radius 2 or 3 was used for this or that model, as it is already in the pipeline itself. If we need to see the parameters for a particular pipeline of model, we can always get the non default settings via print or all settings with .get_params()." @@ -324,13 +324,13 @@ { "cell_type": "code", "execution_count": 11, - "id": "1eacda8f", + "id": "63c8ef60", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:15.765023Z", - "iopub.status.busy": "2024-04-12T12:10:15.764682Z", - "iopub.status.idle": "2024-04-12T12:10:15.772249Z", - "shell.execute_reply": "2024-04-12T12:10:15.771692Z" + "iopub.execute_input": "2024-11-24T09:27:23.309849Z", + "iopub.status.busy": "2024-11-24T09:27:23.309649Z", + "iopub.status.idle": "2024-11-24T09:27:23.317613Z", + "shell.execute_reply": "2024-11-24T09:27:23.316837Z" }, "lines_to_next_cell": 2 }, @@ -348,15 +348,17 @@ " 'pipe': Pipeline(steps=[('mol_transformer', MorganFingerprintTransformer()),\n", " ('Regressor', Ridge())]),\n", " 'smiles_transformer__parallel': False,\n", + " 'smiles_transformer__safe_inference_mode': False,\n", " 'pipe__memory': None,\n", " 'pipe__steps': [('mol_transformer', MorganFingerprintTransformer()),\n", " ('Regressor', Ridge())],\n", " 'pipe__verbose': False,\n", " 'pipe__mol_transformer': MorganFingerprintTransformer(),\n", " 'pipe__Regressor': Ridge(),\n", - " 'pipe__mol_transformer__nBits': 2048,\n", + " 'pipe__mol_transformer__fpSize': 2048,\n", " 'pipe__mol_transformer__parallel': False,\n", " 'pipe__mol_transformer__radius': 2,\n", + " 'pipe__mol_transformer__safe_inference_mode': False,\n", " 'pipe__mol_transformer__useBondTypes': True,\n", " 'pipe__mol_transformer__useChirality': False,\n", " 'pipe__mol_transformer__useCounts': False,\n", diff --git a/notebooks/04_standardizer.ipynb b/notebooks/04_standardizer.ipynb index 1cf8175..ea59112 100644 --- a/notebooks/04_standardizer.ipynb +++ b/notebooks/04_standardizer.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "be682e9c", + "id": "095e3de9", "metadata": {}, "source": [ "# Molecule standardization\n", @@ -12,13 +12,13 @@ { "cell_type": "code", "execution_count": 1, - "id": "2aa91923", + "id": "d40bdabe", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:17.122184Z", - "iopub.status.busy": "2024-04-12T12:10:17.121928Z", - "iopub.status.idle": "2024-04-12T12:10:17.878455Z", - "shell.execute_reply": "2024-04-12T12:10:17.877770Z" + "iopub.execute_input": "2024-11-24T09:27:25.092168Z", + "iopub.status.busy": "2024-11-24T09:27:25.091775Z", + "iopub.status.idle": "2024-11-24T09:27:25.972589Z", + "shell.execute_reply": "2024-11-24T09:27:25.971827Z" } }, "outputs": [], @@ -33,7 +33,7 @@ }, { "cell_type": "markdown", - "id": "0fed8d0b", + "id": "1f739296", "metadata": {}, "source": [ "For demonstration let's create some molecules with different protonation states. The two first molecules are Benzoic acid and Sodium benzoate." @@ -42,20 +42,20 @@ { "cell_type": "code", "execution_count": 2, - "id": "934c031b", + "id": "5a45dfd5", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:17.881422Z", - "iopub.status.busy": "2024-04-12T12:10:17.881092Z", - "iopub.status.idle": "2024-04-12T12:10:17.889228Z", - "shell.execute_reply": "2024-04-12T12:10:17.888702Z" + "iopub.execute_input": "2024-11-24T09:27:25.975743Z", + "iopub.status.busy": "2024-11-24T09:27:25.975328Z", + "iopub.status.idle": "2024-11-24T09:27:25.984915Z", + "shell.execute_reply": "2024-11-24T09:27:25.984323Z" } }, "outputs": [ { "data": { "text/plain": [ - "array([], dtype=object)" + "array([], dtype=object)" ] }, "metadata": {}, @@ -64,7 +64,7 @@ { "data": { "text/plain": [ - "array([], dtype=object)" + "array([], dtype=object)" ] }, "metadata": {}, @@ -84,7 +84,7 @@ }, { "cell_type": "markdown", - "id": "e68b0eff", + "id": "1974e56a", "metadata": {}, "source": [ "We can simply use the transformer directly and get a list of standardized molecules" @@ -93,13 +93,13 @@ { "cell_type": "code", "execution_count": 3, - "id": "c18bbd3e", + "id": "d13141c6", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:17.891559Z", - "iopub.status.busy": "2024-04-12T12:10:17.891342Z", - "iopub.status.idle": "2024-04-12T12:10:17.906792Z", - "shell.execute_reply": "2024-04-12T12:10:17.906295Z" + "iopub.execute_input": "2024-11-24T09:27:25.987910Z", + "iopub.status.busy": "2024-11-24T09:27:25.987688Z", + "iopub.status.idle": "2024-11-24T09:27:26.003398Z", + "shell.execute_reply": "2024-11-24T09:27:26.002776Z" } }, "outputs": [ @@ -129,7 +129,7 @@ }, { "cell_type": "markdown", - "id": "da3fc3e2", + "id": "d268d331", "metadata": {}, "source": [ "Some of the molecules were desalted and neutralized.\n", @@ -140,13 +140,13 @@ { "cell_type": "code", "execution_count": 4, - "id": "d03be7dc", + "id": "a376a759", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:17.909344Z", - "iopub.status.busy": "2024-04-12T12:10:17.909132Z", - "iopub.status.idle": "2024-04-12T12:10:17.939678Z", - "shell.execute_reply": "2024-04-12T12:10:17.939104Z" + "iopub.execute_input": "2024-11-24T09:27:26.006132Z", + "iopub.status.busy": "2024-11-24T09:27:26.005882Z", + "iopub.status.idle": "2024-11-24T09:27:26.034347Z", + "shell.execute_reply": "2024-11-24T09:27:26.033821Z" }, "lines_to_next_cell": 2 }, @@ -158,30 +158,6 @@ "Predictions with no standardization: [0.51983795 0.61543701 2.31738354 3.01206795 3.44085399 4.37516731]\n", "Predictions with standardization: [0.51983795 0.51983795 2.06562022 3.01206795 3.95446692 4.92816899]\n" ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n" - ] } ], "source": [ @@ -202,7 +178,7 @@ }, { "cell_type": "markdown", - "id": "9cfab56c", + "id": "f0d071fb", "metadata": {}, "source": [ "As we can see, the predictions with the standardizer and without are different. The two first molecules were benzoic acid and sodium benzoate, which with the standardized pipeline is predicted as the same, but differently with the nonstandardized pipeline. Wheter we want to make the prediction on the parent compound, or predict the exact form, will of course depend on the use-case, but now there is at least a way to handle it easily in pipelined predictors.\n", @@ -215,13 +191,13 @@ { "cell_type": "code", "execution_count": 5, - "id": "e5f8495f", + "id": "50f71bca", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:17.942493Z", - "iopub.status.busy": "2024-04-12T12:10:17.942248Z", - "iopub.status.idle": "2024-04-12T12:10:17.961792Z", - "shell.execute_reply": "2024-04-12T12:10:17.961206Z" + "iopub.execute_input": "2024-11-24T09:27:26.037572Z", + "iopub.status.busy": "2024-11-24T09:27:26.036950Z", + "iopub.status.idle": "2024-11-24T09:27:26.056194Z", + "shell.execute_reply": "2024-11-24T09:27:26.055013Z" } }, "outputs": [ @@ -232,30 +208,6 @@ "Predictions with no standardization: [0.07445775 0.96053374 2.05993278 3.00857908 3.96365443 4.93284221]\n", "Predictions with standardization: [0.07445775 0.07445775 2.32132164 3.00857908 2.68502208 4.30275549]\n" ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n", - "[19:07:06] DEPRECATION WARNING: please use MorganGenerator\n" - ] } ], "source": [ diff --git a/notebooks/05_smiles_sanitaztion.ipynb b/notebooks/05_smiles_sanitaztion.ipynb index 70eed7e..d59c830 100644 --- a/notebooks/05_smiles_sanitaztion.ipynb +++ b/notebooks/05_smiles_sanitaztion.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "b6fe4dbd", + "id": "9b787560", "metadata": {}, "source": [ "# SMILES sanitation\n", @@ -12,13 +12,13 @@ { "cell_type": "code", "execution_count": 1, - "id": "c46f479f", + "id": "612aa974", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:19.504461Z", - "iopub.status.busy": "2024-04-12T12:10:19.504268Z", - "iopub.status.idle": "2024-04-12T12:10:19.893131Z", - "shell.execute_reply": "2024-04-12T12:10:19.892459Z" + "iopub.execute_input": "2024-11-24T09:27:27.545695Z", + "iopub.status.busy": "2024-11-24T09:27:27.545293Z", + "iopub.status.idle": "2024-11-24T09:27:28.079174Z", + "shell.execute_reply": "2024-11-24T09:27:28.078490Z" } }, "outputs": [], @@ -33,7 +33,7 @@ }, { "cell_type": "markdown", - "id": "5db46e76", + "id": "0f957a69", "metadata": {}, "source": [ "Now, this example dataset contain all sanitizable SMILES, so for demonstration purposes, we will corrupt one of them" @@ -42,13 +42,13 @@ { "cell_type": "code", "execution_count": 2, - "id": "a7bc5ff6", + "id": "b09cfd6b", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:19.896100Z", - "iopub.status.busy": "2024-04-12T12:10:19.895854Z", - "iopub.status.idle": "2024-04-12T12:10:19.899441Z", - "shell.execute_reply": "2024-04-12T12:10:19.898958Z" + "iopub.execute_input": "2024-11-24T09:27:28.082222Z", + "iopub.status.busy": "2024-11-24T09:27:28.081921Z", + "iopub.status.idle": "2024-11-24T09:27:28.086003Z", + "shell.execute_reply": "2024-11-24T09:27:28.085450Z" } }, "outputs": [], @@ -59,13 +59,13 @@ { "cell_type": "code", "execution_count": 3, - "id": "a792f370", + "id": "e20fb5cc", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:19.901832Z", - "iopub.status.busy": "2024-04-12T12:10:19.901618Z", - "iopub.status.idle": "2024-04-12T12:10:19.939335Z", - "shell.execute_reply": "2024-04-12T12:10:19.938810Z" + "iopub.execute_input": "2024-11-24T09:27:28.088449Z", + "iopub.status.busy": "2024-11-24T09:27:28.088211Z", + "iopub.status.idle": "2024-11-24T09:27:28.130818Z", + "shell.execute_reply": "2024-11-24T09:27:28.130102Z" }, "lines_to_next_cell": 2 }, @@ -81,7 +81,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[14:10:19] Explicit valence for atom # 1 N, 4, is greater than permitted\n" + "[10:27:28] Explicit valence for atom # 1 N, 4, is greater than permitted\n" ] } ], @@ -93,7 +93,7 @@ }, { "cell_type": "markdown", - "id": "54341ece", + "id": "f8dccd93", "metadata": {}, "source": [ "If we use these SMILES for the scikit-learn pipeline, we would face an error, so we need to check and clean the dataset first. The CheckSmilesSanitation can help us with that." @@ -102,13 +102,13 @@ { "cell_type": "code", "execution_count": 4, - "id": "849b643f", + "id": "3dbd50b3", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:19.942039Z", - "iopub.status.busy": "2024-04-12T12:10:19.941833Z", - "iopub.status.idle": "2024-04-12T12:10:19.981190Z", - "shell.execute_reply": "2024-04-12T12:10:19.980569Z" + "iopub.execute_input": "2024-11-24T09:27:28.133745Z", + "iopub.status.busy": "2024-11-24T09:27:28.133507Z", + "iopub.status.idle": "2024-11-24T09:27:28.508377Z", + "shell.execute_reply": "2024-11-24T09:27:28.507130Z" } }, "outputs": [ @@ -123,7 +123,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[14:10:19] Explicit valence for atom # 1 N, 4, is greater than permitted\n" + "[10:27:28] Explicit valence for atom # 1 N, 4, is greater than permitted\n" ] } ], @@ -136,7 +136,7 @@ }, { "cell_type": "markdown", - "id": "5e410e4c", + "id": "c888d7da", "metadata": {}, "source": [ "Now the smiles_list_valid should be all valid and the y_values filtered as well. Errors are returned, but also accesible after the call to .sanitize() in the .errors property" @@ -145,13 +145,13 @@ { "cell_type": "code", "execution_count": 5, - "id": "2145530c", + "id": "5af5ea3d", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:19.983672Z", - "iopub.status.busy": "2024-04-12T12:10:19.983440Z", - "iopub.status.idle": "2024-04-12T12:10:19.995617Z", - "shell.execute_reply": "2024-04-12T12:10:19.995060Z" + "iopub.execute_input": "2024-11-24T09:27:28.511261Z", + "iopub.status.busy": "2024-11-24T09:27:28.510945Z", + "iopub.status.idle": "2024-11-24T09:27:28.522024Z", + "shell.execute_reply": "2024-11-24T09:27:28.521232Z" } }, "outputs": [ @@ -206,7 +206,7 @@ }, { "cell_type": "markdown", - "id": "c9906a45", + "id": "c2ce2677", "metadata": {}, "source": [ "The checker can also be used only on X" @@ -215,13 +215,13 @@ { "cell_type": "code", "execution_count": 6, - "id": "d5fd425e", + "id": "84db07cc", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:19.998113Z", - "iopub.status.busy": "2024-04-12T12:10:19.997903Z", - "iopub.status.idle": "2024-04-12T12:10:20.040005Z", - "shell.execute_reply": "2024-04-12T12:10:20.039381Z" + "iopub.execute_input": "2024-11-24T09:27:28.524982Z", + "iopub.status.busy": "2024-11-24T09:27:28.524717Z", + "iopub.status.idle": "2024-11-24T09:27:28.569119Z", + "shell.execute_reply": "2024-11-24T09:27:28.568473Z" } }, "outputs": [ @@ -236,7 +236,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[14:10:19] Explicit valence for atom # 1 N, 4, is greater than permitted\n" + "[10:27:28] Explicit valence for atom # 1 N, 4, is greater than permitted\n" ] }, { diff --git a/notebooks/06_hyperparameter_tuning.ipynb b/notebooks/06_hyperparameter_tuning.ipynb index d15d9fe..9afcc21 100644 --- a/notebooks/06_hyperparameter_tuning.ipynb +++ b/notebooks/06_hyperparameter_tuning.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "9d600dd1", + "id": "f0b0cc54", "metadata": {}, "source": [ "# Full example: Hyperparameter tuning\n", @@ -13,13 +13,13 @@ { "cell_type": "code", "execution_count": 1, - "id": "7df4793c", + "id": "51aa3d62", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:21.479028Z", - "iopub.status.busy": "2024-04-12T12:10:21.478831Z", - "iopub.status.idle": "2024-04-12T12:10:22.543557Z", - "shell.execute_reply": "2024-04-12T12:10:22.542929Z" + "iopub.execute_input": "2024-11-24T09:27:30.230310Z", + "iopub.status.busy": "2024-11-24T09:27:30.230076Z", + "iopub.status.idle": "2024-11-24T09:27:31.452867Z", + "shell.execute_reply": "2024-11-24T09:27:31.452127Z" } }, "outputs": [], @@ -41,7 +41,7 @@ }, { "cell_type": "markdown", - "id": "8acee1c3", + "id": "e07990d0", "metadata": {}, "source": [ "We will need some data. There is a dataset with the SLC6A4 active compounds from ExcapeDB on Zenodo. The scikit-mol project uses a subset of this for testing, and the samples there has been specially selected to give good results in testing (it should therefore be used for any production modelling). If full_set is false, the fast subset will be used, and otherwise the full dataset will be downloaded if needed." @@ -50,13 +50,13 @@ { "cell_type": "code", "execution_count": 2, - "id": "45a8ebf1", + "id": "adbc1868", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:22.546506Z", - "iopub.status.busy": "2024-04-12T12:10:22.546207Z", - "iopub.status.idle": "2024-04-12T12:10:22.550011Z", - "shell.execute_reply": "2024-04-12T12:10:22.549526Z" + "iopub.execute_input": "2024-11-24T09:27:31.455770Z", + "iopub.status.busy": "2024-11-24T09:27:31.455436Z", + "iopub.status.idle": "2024-11-24T09:27:31.459245Z", + "shell.execute_reply": "2024-11-24T09:27:31.458654Z" } }, "outputs": [], @@ -67,15 +67,16 @@ " csv_file = \"SLC6A4_active_excape_export.csv\"\n", " if not os.path.exists(csv_file):\n", " import urllib.request\n", + "\n", " url = \"https://ndownloader.figshare.com/files/25747817\"\n", " urllib.request.urlretrieve(url, csv_file)\n", "else:\n", - " csv_file = '../tests/data/SLC6A4_active_excapedb_subset.csv'" + " csv_file = \"../tests/data/SLC6A4_active_excapedb_subset.csv\"" ] }, { "cell_type": "markdown", - "id": "f3c108d4", + "id": "d2ce3c7f", "metadata": {}, "source": [ "The CSV data is loaded into a Pandas dataframe and the PandasTools utility from RDKit is used to add a column with RDKit molecules" @@ -84,13 +85,13 @@ { "cell_type": "code", "execution_count": 3, - "id": "08c233a7", + "id": "9a283f12", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:22.552652Z", - "iopub.status.busy": "2024-04-12T12:10:22.552186Z", - "iopub.status.idle": "2024-04-12T12:10:22.591597Z", - "shell.execute_reply": "2024-04-12T12:10:22.591017Z" + "iopub.execute_input": "2024-11-24T09:27:31.461622Z", + "iopub.status.busy": "2024-11-24T09:27:31.461384Z", + "iopub.status.idle": "2024-11-24T09:27:31.500359Z", + "shell.execute_reply": "2024-11-24T09:27:31.499764Z" } }, "outputs": [ @@ -112,7 +113,7 @@ }, { "cell_type": "markdown", - "id": "e1828bee", + "id": "e245e989", "metadata": {}, "source": [ "We use the train_test_split to, well, split the dataframe's molecule columns and pXC50 column into lists for train and testing" @@ -121,25 +122,27 @@ { "cell_type": "code", "execution_count": 4, - "id": "5363d05a", + "id": "303b83de", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:22.594077Z", - "iopub.status.busy": "2024-04-12T12:10:22.593866Z", - "iopub.status.idle": "2024-04-12T12:10:22.598774Z", - "shell.execute_reply": "2024-04-12T12:10:22.598212Z" + "iopub.execute_input": "2024-11-24T09:27:31.502982Z", + "iopub.status.busy": "2024-11-24T09:27:31.502779Z", + "iopub.status.idle": "2024-11-24T09:27:31.507447Z", + "shell.execute_reply": "2024-11-24T09:27:31.506962Z" }, "lines_to_next_cell": 2 }, "outputs": [], "source": [ "\n", - "mol_list_train, mol_list_test, y_train, y_test = train_test_split(data.ROMol, data.pXC50, random_state=42)" + "mol_list_train, mol_list_test, y_train, y_test = train_test_split(\n", + " data.ROMol, data.pXC50, random_state=42\n", + ")" ] }, { "cell_type": "markdown", - "id": "bf9e8c8d", + "id": "56247c3b", "metadata": {}, "source": [ "We will standardize the molecules before modelling. This is best done before the hyperparameter optimizatiion of the featurization with the scikit-mol transformer and regression modelling, as the standardization is otherwise done for every loop in the hyperparameter optimization, which will make it take longer time." @@ -148,18 +151,18 @@ { "cell_type": "code", "execution_count": 5, - "id": "885daf12", + "id": "1383d0fc", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:22.601148Z", - "iopub.status.busy": "2024-04-12T12:10:22.600938Z", - "iopub.status.idle": "2024-04-12T12:10:22.967869Z", - "shell.execute_reply": "2024-04-12T12:10:22.967178Z" + "iopub.execute_input": "2024-11-24T09:27:31.509953Z", + "iopub.status.busy": "2024-11-24T09:27:31.509731Z", + "iopub.status.idle": "2024-11-24T09:27:31.830576Z", + "shell.execute_reply": "2024-11-24T09:27:31.829874Z" } }, "outputs": [], "source": [ - "# Probably the recommended way would be to prestandardize the data if there's no changes to the transformer, \n", + "# Probably the recommended way would be to prestandardize the data if there's no changes to the transformer,\n", "# and then add the standardizer in the inference pipeline.\n", "\n", "from scikit_mol.standardizer import Standardizer\n", @@ -170,7 +173,7 @@ }, { "cell_type": "markdown", - "id": "a81ae4c4", + "id": "0775d395", "metadata": {}, "source": [ "A simple pipeline with a MorganTransformer and a Ridge() regression for demonstration." @@ -179,14 +182,15 @@ { "cell_type": "code", "execution_count": 6, - "id": "8fd14250", + "id": "51c74711", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:22.970766Z", - "iopub.status.busy": "2024-04-12T12:10:22.970539Z", - "iopub.status.idle": "2024-04-12T12:10:22.973715Z", - "shell.execute_reply": "2024-04-12T12:10:22.973234Z" - } + "iopub.execute_input": "2024-11-24T09:27:31.833379Z", + "iopub.status.busy": "2024-11-24T09:27:31.833155Z", + "iopub.status.idle": "2024-11-24T09:27:31.836541Z", + "shell.execute_reply": "2024-11-24T09:27:31.835939Z" + }, + "lines_to_next_cell": 2 }, "outputs": [], "source": [ @@ -194,13 +198,12 @@ "moltransformer = MorganFingerprintTransformer()\n", "regressor = Ridge()\n", "\n", - "optimization_pipe = make_pipeline(moltransformer, regressor)\n", - "\n" + "optimization_pipe = make_pipeline(moltransformer, regressor)" ] }, { "cell_type": "markdown", - "id": "ad2752d0", + "id": "8221a682", "metadata": {}, "source": [ "For hyperparameter optimization we import the RandomizedSearchCV class from Scikit-Learn. It will try different random combinations of settings and use internal cross-validation to find the best model. In the end, it will fit the best found parameters on the full set. We also import loguniform, to get a better sampling of some of the parameters." @@ -209,26 +212,27 @@ { "cell_type": "code", "execution_count": 7, - "id": "fa082078", + "id": "4c6b833f", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:22.976061Z", - "iopub.status.busy": "2024-04-12T12:10:22.975846Z", - "iopub.status.idle": "2024-04-12T12:10:22.978721Z", - "shell.execute_reply": "2024-04-12T12:10:22.978155Z" + "iopub.execute_input": "2024-11-24T09:27:31.838854Z", + "iopub.status.busy": "2024-11-24T09:27:31.838668Z", + "iopub.status.idle": "2024-11-24T09:27:31.841636Z", + "shell.execute_reply": "2024-11-24T09:27:31.841130Z" }, "title": "Now hyperparameter tuning" }, "outputs": [], "source": [ "from sklearn.model_selection import RandomizedSearchCV\n", - "#from sklearn.utils.fixes import loguniform\n", + "\n", + "# from sklearn.utils.fixes import loguniform\n", "from scipy.stats import loguniform" ] }, { "cell_type": "markdown", - "id": "fa2a316a", + "id": "6b9d4576", "metadata": {}, "source": [ "With the pipelines, getting the names of the parameters to tune is a bit more tricky, as they are concatenations of the name of the step and the parameter with double underscores in between. We can get the available parameters from the pipeline with the get_params() method, and select the parameters we want to change from there." @@ -237,13 +241,13 @@ { "cell_type": "code", "execution_count": 8, - "id": "046e24d3", + "id": "0af1003b", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:22.981002Z", - "iopub.status.busy": "2024-04-12T12:10:22.980797Z", - "iopub.status.idle": "2024-04-12T12:10:22.986820Z", - "shell.execute_reply": "2024-04-12T12:10:22.986344Z" + "iopub.execute_input": "2024-11-24T09:27:31.843922Z", + "iopub.status.busy": "2024-11-24T09:27:31.843728Z", + "iopub.status.idle": "2024-11-24T09:27:31.849777Z", + "shell.execute_reply": "2024-11-24T09:27:31.849273Z" }, "title": "Which keys do we have?" }, @@ -251,7 +255,7 @@ { "data": { "text/plain": [ - "dict_keys(['memory', 'steps', 'verbose', 'morganfingerprinttransformer', 'ridge', 'morganfingerprinttransformer__nBits', 'morganfingerprinttransformer__parallel', 'morganfingerprinttransformer__radius', 'morganfingerprinttransformer__useBondTypes', 'morganfingerprinttransformer__useChirality', 'morganfingerprinttransformer__useCounts', 'morganfingerprinttransformer__useFeatures', 'ridge__alpha', 'ridge__copy_X', 'ridge__fit_intercept', 'ridge__max_iter', 'ridge__positive', 'ridge__random_state', 'ridge__solver', 'ridge__tol'])" + "dict_keys(['memory', 'steps', 'verbose', 'morganfingerprinttransformer', 'ridge', 'morganfingerprinttransformer__fpSize', 'morganfingerprinttransformer__parallel', 'morganfingerprinttransformer__radius', 'morganfingerprinttransformer__safe_inference_mode', 'morganfingerprinttransformer__useBondTypes', 'morganfingerprinttransformer__useChirality', 'morganfingerprinttransformer__useCounts', 'morganfingerprinttransformer__useFeatures', 'ridge__alpha', 'ridge__copy_X', 'ridge__fit_intercept', 'ridge__max_iter', 'ridge__positive', 'ridge__random_state', 'ridge__solver', 'ridge__tol'])" ] }, "execution_count": 8, @@ -266,7 +270,7 @@ }, { "cell_type": "markdown", - "id": "cd7c2297", + "id": "cb0db6a5", "metadata": {}, "source": [ "We will tune the regularization strength of the Ridge regressor, and try out different parameters for the Morgan fingerprint, namely the number of bits, the radius of the fingerprint, wheter to use counts or bits and features." @@ -275,30 +279,33 @@ { "cell_type": "code", "execution_count": 9, - "id": "cf2c45d7", + "id": "c2d541b3", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:22.989460Z", - "iopub.status.busy": "2024-04-12T12:10:22.988970Z", - "iopub.status.idle": "2024-04-12T12:10:22.993117Z", - "shell.execute_reply": "2024-04-12T12:10:22.992622Z" - }, - "lines_to_next_cell": 1 + "iopub.execute_input": "2024-11-24T09:27:31.852166Z", + "iopub.status.busy": "2024-11-24T09:27:31.851946Z", + "iopub.status.idle": "2024-11-24T09:27:31.856126Z", + "shell.execute_reply": "2024-11-24T09:27:31.855622Z" + } }, "outputs": [], "source": [ "\n", - "param_dist = {'ridge__alpha': loguniform(1e-2, 1e3),\n", - " \"morganfingerprinttransformer__nBits\": [256,512,1024,2048,4096],\n", - " 'morganfingerprinttransformer__radius':[1,2,3,4],\n", - " 'morganfingerprinttransformer__useCounts': [True,False],\n", - " 'morganfingerprinttransformer__useFeatures':[True,False]}" + "param_dist = {\n", + " \"ridge__alpha\": loguniform(1e-2, 1e3),\n", + " \"morganfingerprinttransformer__fpSize\": [256, 512, 1024, 2048, 4096],\n", + " \"morganfingerprinttransformer__radius\": [1, 2, 3, 4],\n", + " \"morganfingerprinttransformer__useCounts\": [True, False],\n", + " \"morganfingerprinttransformer__useFeatures\": [True, False],\n", + "}" ] }, { "cell_type": "markdown", - "id": "d61fc18c", - "metadata": {}, + "id": "2157d154", + "metadata": { + "lines_to_next_cell": 2 + }, "source": [ "The report function was taken from [this example](https://scikit-learn.org/stable/auto_examples/model_selection/plot_randomized_search.html#sphx-glr-auto-examples-model-selection-plot-randomized-search-py) from the scikit learn documentation." ] @@ -306,13 +313,13 @@ { "cell_type": "code", "execution_count": 10, - "id": "fbb2cacd", + "id": "f2c91783", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:22.995477Z", - "iopub.status.busy": "2024-04-12T12:10:22.995281Z", - "iopub.status.idle": "2024-04-12T12:10:22.999317Z", - "shell.execute_reply": "2024-04-12T12:10:22.998769Z" + "iopub.execute_input": "2024-11-24T09:27:31.858429Z", + "iopub.status.busy": "2024-11-24T09:27:31.858216Z", + "iopub.status.idle": "2024-11-24T09:27:31.862461Z", + "shell.execute_reply": "2024-11-24T09:27:31.861795Z" }, "title": "From https://scikit-learn.org/stable/auto_examples/model_selection/plot_randomized_search.html#sphx-glr-auto-examples-model-selection-plot-randomized-search-py" }, @@ -336,7 +343,7 @@ }, { "cell_type": "markdown", - "id": "ad49376f", + "id": "469691f4", "metadata": {}, "source": [ "We will do 25 tries of random parameter sets, and see what comes out as the best one. If you are using the small example dataset, this should take some second, but may take some minutes with the full set." @@ -345,13 +352,13 @@ { "cell_type": "code", "execution_count": 11, - "id": "bc66efa3", + "id": "79a70a0f", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:23.001598Z", - "iopub.status.busy": "2024-04-12T12:10:23.001400Z", - "iopub.status.idle": "2024-04-12T12:10:27.149245Z", - "shell.execute_reply": "2024-04-12T12:10:27.148640Z" + "iopub.execute_input": "2024-11-24T09:27:31.864936Z", + "iopub.status.busy": "2024-11-24T09:27:31.864708Z", + "iopub.status.idle": "2024-11-24T09:27:36.221386Z", + "shell.execute_reply": "2024-11-24T09:27:36.220369Z" } }, "outputs": [ @@ -359,7 +366,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Runtime: 4.14 for 25 iterations)\n" + "Runtime: 4.35 for 25 iterations)\n" ] } ], @@ -372,19 +379,19 @@ "random_search.fit(mol_list_std_train, y_train.values)\n", "t1 = time()\n", "\n", - "print(f'Runtime: {t1-t0:0.2F} for {n_iter_search} iterations)')" + "print(f\"Runtime: {t1-t0:0.2F} for {n_iter_search} iterations)\")" ] }, { "cell_type": "code", "execution_count": 12, - "id": "b2b3d623", + "id": "b6160cb3", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:27.153307Z", - "iopub.status.busy": "2024-04-12T12:10:27.152284Z", - "iopub.status.idle": "2024-04-12T12:10:27.157929Z", - "shell.execute_reply": "2024-04-12T12:10:27.157394Z" + "iopub.execute_input": "2024-11-24T09:27:36.224876Z", + "iopub.status.busy": "2024-11-24T09:27:36.224667Z", + "iopub.status.idle": "2024-11-24T09:27:36.232647Z", + "shell.execute_reply": "2024-11-24T09:27:36.231571Z" }, "lines_to_next_cell": 0 }, @@ -394,16 +401,16 @@ "output_type": "stream", "text": [ "Model with rank: 1\n", - "Mean validation score: 0.539 (std: 0.090)\n", - "Parameters: {'morganfingerprinttransformer__nBits': 2048, 'morganfingerprinttransformer__radius': 1, 'morganfingerprinttransformer__useCounts': False, 'morganfingerprinttransformer__useFeatures': False, 'ridge__alpha': 10.016632822744322}\n", + "Mean validation score: 0.563 (std: 0.115)\n", + "Parameters: {'morganfingerprinttransformer__fpSize': 1024, 'morganfingerprinttransformer__radius': 2, 'morganfingerprinttransformer__useCounts': False, 'morganfingerprinttransformer__useFeatures': False, 'ridge__alpha': 6.855244257973563}\n", "\n", "Model with rank: 2\n", - "Mean validation score: 0.534 (std: 0.145)\n", - "Parameters: {'morganfingerprinttransformer__nBits': 2048, 'morganfingerprinttransformer__radius': 3, 'morganfingerprinttransformer__useCounts': False, 'morganfingerprinttransformer__useFeatures': False, 'ridge__alpha': 0.7814453905088184}\n", + "Mean validation score: 0.527 (std: 0.086)\n", + "Parameters: {'morganfingerprinttransformer__fpSize': 512, 'morganfingerprinttransformer__radius': 2, 'morganfingerprinttransformer__useCounts': False, 'morganfingerprinttransformer__useFeatures': False, 'ridge__alpha': 13.611425709525077}\n", "\n", "Model with rank: 3\n", - "Mean validation score: 0.526 (std: 0.090)\n", - "Parameters: {'morganfingerprinttransformer__nBits': 4096, 'morganfingerprinttransformer__radius': 1, 'morganfingerprinttransformer__useCounts': False, 'morganfingerprinttransformer__useFeatures': False, 'ridge__alpha': 11.295262712617353}\n", + "Mean validation score: 0.466 (std: 0.149)\n", + "Parameters: {'morganfingerprinttransformer__fpSize': 2048, 'morganfingerprinttransformer__radius': 4, 'morganfingerprinttransformer__useCounts': False, 'morganfingerprinttransformer__useFeatures': True, 'ridge__alpha': 1.383163758398022}\n", "\n" ] } @@ -414,7 +421,7 @@ }, { "cell_type": "markdown", - "id": "219e3e32", + "id": "9a2ea219", "metadata": {}, "source": [ "It can be interesting to see what combinations of hyperparameters gave good results for the cross-validation. Usually the number of bits are in the high end and radius is 2 to 4. But this can vary a bit, as we do a small number of tries for this demo. More extended search with more iterations could maybe find even better and more consistent. solutions" @@ -422,7 +429,7 @@ }, { "cell_type": "markdown", - "id": "630772af", + "id": "6cf91582", "metadata": {}, "source": [ "Let's see if standardization had any influence on this dataset. We build an inference pipeline that includes the standardization object and the best estimator, and run the best estimator directly on the list of test molecules" @@ -431,13 +438,13 @@ { "cell_type": "code", "execution_count": 13, - "id": "cb369a0e", + "id": "4daaf106", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:27.161728Z", - "iopub.status.busy": "2024-04-12T12:10:27.160746Z", - "iopub.status.idle": "2024-04-12T12:10:27.316434Z", - "shell.execute_reply": "2024-04-12T12:10:27.315830Z" + "iopub.execute_input": "2024-11-24T09:27:36.236805Z", + "iopub.status.busy": "2024-11-24T09:27:36.235794Z", + "iopub.status.idle": "2024-11-24T09:27:36.394539Z", + "shell.execute_reply": "2024-11-24T09:27:36.393590Z" } }, "outputs": [ @@ -445,27 +452,29 @@ "name": "stdout", "output_type": "stream", "text": [ - "No Standardization 0.5379\n", - "With Standardization 0.5379\n" + "No Standardization 0.6389\n", + "With Standardization 0.6389\n" ] } ], "source": [ "inference_pipe = make_pipeline(standardizer, random_search.best_estimator_)\n", "\n", - "print(f'No Standardization {random_search.best_estimator_.score(mol_list_test, y_test):0.4F}')\n", - "print(f'With Standardization {inference_pipe.score(mol_list_test, y_test):0.4F}')" + "print(\n", + " f\"No Standardization {random_search.best_estimator_.score(mol_list_test, y_test):0.4F}\"\n", + ")\n", + "print(f\"With Standardization {inference_pipe.score(mol_list_test, y_test):0.4F}\")" ] }, { "cell_type": "markdown", - "id": "e00bae88", + "id": "2d31c059", "metadata": { "lines_to_next_cell": 0, "title": "Building an inference pipeline, it appears our test-data was pretty standard" }, "source": [ - "We see that the dataset already appeared to be in forms that are similar to the ones coming from the standardization. \n", + "We see that the dataset already appeared to be in forms that are similar to the ones coming from the standardization.\n", "\n", "Interestingly the test-set performance often seem to be better than the CV performance during the hyperparameter search. This may be due to the model being refit at the end of the search to the whole training dataset, as the refit parameter on the randomized_search object by default is true. The final model is thus fitted on more data than the individual models during training.\n", "\n", @@ -475,13 +484,13 @@ { "cell_type": "code", "execution_count": 14, - "id": "f6426b23", + "id": "92105568", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:27.320951Z", - "iopub.status.busy": "2024-04-12T12:10:27.319924Z", - "iopub.status.idle": "2024-04-12T12:10:27.337918Z", - "shell.execute_reply": "2024-04-12T12:10:27.337344Z" + "iopub.execute_input": "2024-11-24T09:27:36.397490Z", + "iopub.status.busy": "2024-11-24T09:27:36.397082Z", + "iopub.status.idle": "2024-11-24T09:27:36.411965Z", + "shell.execute_reply": "2024-11-24T09:27:36.411400Z" } }, "outputs": [ @@ -489,23 +498,31 @@ "name": "stdout", "output_type": "stream", "text": [ - "Predictions with no standardization: [5.95334818 5.99768591 5.99768591 6.31509408 6.09785566]\n", - "Predictions with standardization: [5.95334818 5.95334818 5.95334818 5.95334818 5.95334818]\n" + "Predictions with no standardization: [5.89126045 5.97721234 5.97721234 6.03427056 6.03951076]\n", + "Predictions with standardization: [5.89126045 5.89126045 5.89126045 5.89126045 5.89126045]\n" ] } ], "source": [ "# Intergrating the Standardizer and challenge it with some different forms and salts of benzoic acid\n", - "smiles_list = ['c1ccccc1C(=O)[OH]', 'c1ccccc1C(=O)[O-]', 'c1ccccc1C(=O)[O-].[Na+]', 'c1ccccc1C(=O)[O][Na]', 'c1ccccc1C(=O)[O-].C[N+](C)C']\n", + "smiles_list = [\n", + " \"c1ccccc1C(=O)[OH]\",\n", + " \"c1ccccc1C(=O)[O-]\",\n", + " \"c1ccccc1C(=O)[O-].[Na+]\",\n", + " \"c1ccccc1C(=O)[O][Na]\",\n", + " \"c1ccccc1C(=O)[O-].C[N+](C)C\",\n", + "]\n", "mols_list = [Chem.MolFromSmiles(smiles) for smiles in smiles_list]\n", "\n", - "print(f'Predictions with no standardization: {random_search.best_estimator_.predict(mols_list)}')\n", - "print(f'Predictions with standardization: {inference_pipe.predict(mols_list)}')" + "print(\n", + " f\"Predictions with no standardization: {random_search.best_estimator_.predict(mols_list)}\"\n", + ")\n", + "print(f\"Predictions with standardization: {inference_pipe.predict(mols_list)}\")" ] }, { "cell_type": "markdown", - "id": "47345289", + "id": "9d196197", "metadata": {}, "source": [ "Without standardization we get variation in the predictions, but with the standardization object in place, we get the same results. If you want a model that gives different predictions for the different forms, either the standardization need to be removed or the settings changed.\n", @@ -515,7 +532,7 @@ }, { "cell_type": "markdown", - "id": "8e0a8f49", + "id": "824ebc99", "metadata": {}, "source": [] } diff --git a/notebooks/06_hyperparameter_tuning.py b/notebooks/06_hyperparameter_tuning.py index 0747bd2..ba59814 100644 --- a/notebooks/06_hyperparameter_tuning.py +++ b/notebooks/06_hyperparameter_tuning.py @@ -44,10 +44,11 @@ csv_file = "SLC6A4_active_excape_export.csv" if not os.path.exists(csv_file): import urllib.request + url = "https://ndownloader.figshare.com/files/25747817" urllib.request.urlretrieve(url, csv_file) else: - csv_file = '../tests/data/SLC6A4_active_excapedb_subset.csv' + csv_file = "../tests/data/SLC6A4_active_excapedb_subset.csv" # %% [markdown] # The CSV data is loaded into a Pandas dataframe and the PandasTools utility from RDKit is used to add a column with RDKit molecules @@ -64,14 +65,16 @@ # %% -mol_list_train, mol_list_test, y_train, y_test = train_test_split(data.ROMol, data.pXC50, random_state=42) +mol_list_train, mol_list_test, y_train, y_test = train_test_split( + data.ROMol, data.pXC50, random_state=42 +) # %% [markdown] # We will standardize the molecules before modelling. This is best done before the hyperparameter optimizatiion of the featurization with the scikit-mol transformer and regression modelling, as the standardization is otherwise done for every loop in the hyperparameter optimization, which will make it take longer time. # %% -# Probably the recommended way would be to prestandardize the data if there's no changes to the transformer, +# Probably the recommended way would be to prestandardize the data if there's no changes to the transformer, # and then add the standardizer in the inference pipeline. from scikit_mol.standardizer import Standardizer @@ -90,13 +93,13 @@ optimization_pipe = make_pipeline(moltransformer, regressor) - # %% [markdown] # For hyperparameter optimization we import the RandomizedSearchCV class from Scikit-Learn. It will try different random combinations of settings and use internal cross-validation to find the best model. In the end, it will fit the best found parameters on the full set. We also import loguniform, to get a better sampling of some of the parameters. # %% Now hyperparameter tuning from sklearn.model_selection import RandomizedSearchCV -#from sklearn.utils.fixes import loguniform + +# from sklearn.utils.fixes import loguniform from scipy.stats import loguniform # %% [markdown] @@ -111,15 +114,18 @@ # %% -param_dist = {'ridge__alpha': loguniform(1e-2, 1e3), - "morganfingerprinttransformer__nBits": [256,512,1024,2048,4096], - 'morganfingerprinttransformer__radius':[1,2,3,4], - 'morganfingerprinttransformer__useCounts': [True,False], - 'morganfingerprinttransformer__useFeatures':[True,False]} +param_dist = { + "ridge__alpha": loguniform(1e-2, 1e3), + "morganfingerprinttransformer__fpSize": [256, 512, 1024, 2048, 4096], + "morganfingerprinttransformer__radius": [1, 2, 3, 4], + "morganfingerprinttransformer__useCounts": [True, False], + "morganfingerprinttransformer__useFeatures": [True, False], +} # %% [markdown] # The report function was taken from [this example](https://scikit-learn.org/stable/auto_examples/model_selection/plot_randomized_search.html#sphx-glr-auto-examples-model-selection-plot-randomized-search-py) from the scikit learn documentation. + # %% From https://scikit-learn.org/stable/auto_examples/model_selection/plot_randomized_search.html#sphx-glr-auto-examples-model-selection-plot-randomized-search-py # Utility function to report best scores def report(results, n_top=3): @@ -149,7 +155,7 @@ def report(results, n_top=3): random_search.fit(mol_list_std_train, y_train.values) t1 = time() -print(f'Runtime: {t1-t0:0.2F} for {n_iter_search} iterations)') +print(f"Runtime: {t1-t0:0.2F} for {n_iter_search} iterations)") # %% report(random_search.cv_results_) @@ -162,22 +168,32 @@ def report(results, n_top=3): # %% inference_pipe = make_pipeline(standardizer, random_search.best_estimator_) -print(f'No Standardization {random_search.best_estimator_.score(mol_list_test, y_test):0.4F}') -print(f'With Standardization {inference_pipe.score(mol_list_test, y_test):0.4F}') +print( + f"No Standardization {random_search.best_estimator_.score(mol_list_test, y_test):0.4F}" +) +print(f"With Standardization {inference_pipe.score(mol_list_test, y_test):0.4F}") # %% Building an inference pipeline, it appears our test-data was pretty standard [markdown] -# We see that the dataset already appeared to be in forms that are similar to the ones coming from the standardization. +# We see that the dataset already appeared to be in forms that are similar to the ones coming from the standardization. # # Interestingly the test-set performance often seem to be better than the CV performance during the hyperparameter search. This may be due to the model being refit at the end of the search to the whole training dataset, as the refit parameter on the randomized_search object by default is true. The final model is thus fitted on more data than the individual models during training. # # To demonstrate the effect of standartization we can see the difference if we challenge the predictor with different forms of benzoic acid and benzoates. # %% # Intergrating the Standardizer and challenge it with some different forms and salts of benzoic acid -smiles_list = ['c1ccccc1C(=O)[OH]', 'c1ccccc1C(=O)[O-]', 'c1ccccc1C(=O)[O-].[Na+]', 'c1ccccc1C(=O)[O][Na]', 'c1ccccc1C(=O)[O-].C[N+](C)C'] +smiles_list = [ + "c1ccccc1C(=O)[OH]", + "c1ccccc1C(=O)[O-]", + "c1ccccc1C(=O)[O-].[Na+]", + "c1ccccc1C(=O)[O][Na]", + "c1ccccc1C(=O)[O-].C[N+](C)C", +] mols_list = [Chem.MolFromSmiles(smiles) for smiles in smiles_list] -print(f'Predictions with no standardization: {random_search.best_estimator_.predict(mols_list)}') -print(f'Predictions with standardization: {inference_pipe.predict(mols_list)}') +print( + f"Predictions with no standardization: {random_search.best_estimator_.predict(mols_list)}" +) +print(f"Predictions with standardization: {inference_pipe.predict(mols_list)}") # %% [markdown] # Without standardization we get variation in the predictions, but with the standardization object in place, we get the same results. If you want a model that gives different predictions for the different forms, either the standardization need to be removed or the settings changed. diff --git a/notebooks/07_parallel_transforms.ipynb b/notebooks/07_parallel_transforms.ipynb index 36c4ac0..9c111f8 100644 --- a/notebooks/07_parallel_transforms.ipynb +++ b/notebooks/07_parallel_transforms.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "3c3e2734", + "id": "87ed8373", "metadata": {}, "source": [ "# Parallel calculations of transforms\n", @@ -15,13 +15,13 @@ { "cell_type": "code", "execution_count": 1, - "id": "d34a6f7e", + "id": "dac6956a", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:28.855965Z", - "iopub.status.busy": "2024-04-12T12:10:28.855774Z", - "iopub.status.idle": "2024-04-12T12:10:29.593405Z", - "shell.execute_reply": "2024-04-12T12:10:29.592709Z" + "iopub.execute_input": "2024-11-24T09:27:38.302600Z", + "iopub.status.busy": "2024-11-24T09:27:38.302116Z", + "iopub.status.idle": "2024-11-24T09:27:39.171522Z", + "shell.execute_reply": "2024-11-24T09:27:39.170882Z" } }, "outputs": [], @@ -38,7 +38,7 @@ }, { "cell_type": "markdown", - "id": "f73bfd41", + "id": "7c2a81f2", "metadata": {}, "source": [ "## Obtaining the Data\n", @@ -51,13 +51,13 @@ { "cell_type": "code", "execution_count": 2, - "id": "f59b0883", + "id": "f64c418f", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:29.596321Z", - "iopub.status.busy": "2024-04-12T12:10:29.596022Z", - "iopub.status.idle": "2024-04-12T12:10:29.600548Z", - "shell.execute_reply": "2024-04-12T12:10:29.599990Z" + "iopub.execute_input": "2024-11-24T09:27:39.174368Z", + "iopub.status.busy": "2024-11-24T09:27:39.174075Z", + "iopub.status.idle": "2024-11-24T09:27:39.177863Z", + "shell.execute_reply": "2024-11-24T09:27:39.177305Z" } }, "outputs": [], @@ -77,13 +77,13 @@ { "cell_type": "code", "execution_count": 3, - "id": "9cb3cb5c", + "id": "0eabd800", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:29.602922Z", - "iopub.status.busy": "2024-04-12T12:10:29.602713Z", - "iopub.status.idle": "2024-04-12T12:10:29.643748Z", - "shell.execute_reply": "2024-04-12T12:10:29.643141Z" + "iopub.execute_input": "2024-11-24T09:27:39.180191Z", + "iopub.status.busy": "2024-11-24T09:27:39.179937Z", + "iopub.status.idle": "2024-11-24T09:27:39.221096Z", + "shell.execute_reply": "2024-11-24T09:27:39.220386Z" } }, "outputs": [ @@ -105,7 +105,7 @@ }, { "cell_type": "markdown", - "id": "cbdda331", + "id": "4144946e", "metadata": {}, "source": [ "## Evaluating the Impact of Parallelism on Transformations\n", @@ -116,13 +116,13 @@ { "cell_type": "code", "execution_count": 4, - "id": "45c042f0", + "id": "a7f66af7", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:29.646541Z", - "iopub.status.busy": "2024-04-12T12:10:29.646035Z", - "iopub.status.idle": "2024-04-12T12:10:29.651144Z", - "shell.execute_reply": "2024-04-12T12:10:29.650588Z" + "iopub.execute_input": "2024-11-24T09:27:39.223702Z", + "iopub.status.busy": "2024-11-24T09:27:39.223459Z", + "iopub.status.idle": "2024-11-24T09:27:39.228461Z", + "shell.execute_reply": "2024-11-24T09:27:39.227977Z" }, "title": "A demonstration of the speedup that can be had for the descriptor transformer" }, @@ -134,21 +134,687 @@ { "cell_type": "code", "execution_count": 5, - "id": "8f158499", + "id": "a03bc824", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:29.653455Z", - "iopub.status.busy": "2024-04-12T12:10:29.653236Z", - "iopub.status.idle": "2024-04-12T12:10:31.579596Z", - "shell.execute_reply": "2024-04-12T12:10:31.578985Z" + "iopub.execute_input": "2024-11-24T09:27:39.230911Z", + "iopub.status.busy": "2024-11-24T09:27:39.230692Z", + "iopub.status.idle": "2024-11-24T09:27:41.368180Z", + "shell.execute_reply": "2024-11-24T09:27:41.367438Z" } }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:40] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "Calculation time on dataset of size 200 with parallel=False:\t1.92 seconds\n" + "Calculation time on dataset of size 200 with parallel=False:\t2.13 seconds\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" ] } ], @@ -163,7 +829,7 @@ }, { "cell_type": "markdown", - "id": "08b2cb6e", + "id": "d304d675", "metadata": {}, "source": [ "\n", @@ -173,13 +839,13 @@ { "cell_type": "code", "execution_count": 6, - "id": "f1b48596", + "id": "c80388e6", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:31.582218Z", - "iopub.status.busy": "2024-04-12T12:10:31.581970Z", - "iopub.status.idle": "2024-04-12T12:10:32.110173Z", - "shell.execute_reply": "2024-04-12T12:10:32.109459Z" + "iopub.execute_input": "2024-11-24T09:27:41.370886Z", + "iopub.status.busy": "2024-11-24T09:27:41.370638Z", + "iopub.status.idle": "2024-11-24T09:27:42.384085Z", + "shell.execute_reply": "2024-11-24T09:27:42.383188Z" } }, "outputs": [ @@ -187,15 +853,4215 @@ "name": "stderr", "output_type": "stream", "text": [ - "/home/esben/python_envs/vscode/lib/python3.10/site-packages/numpy/core/fromnumeric.py:59: FutureWarning: 'Series.swapaxes' is deprecated and will be removed in a future version. Please use 'Series.transpose' instead.\n", - " return bound(*args, **kwds)\n" + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/esben/python_envs/vscode/lib/python3.10/site-packages/numpy/core/fromnumeric.py:59: FutureWarning: 'Series.swapaxes' is deprecated and will be removed in a future version. Please use 'Series.transpose' instead.\n", + " return bound(*args, **kwds)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:41] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:27:42] DEPRECATION WARNING: please use MorganGenerator\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Calculation time on dataset of size 200 with parallel=True:\t0.52 seconds\n" + "Calculation time on dataset of size 200 with parallel=True:\t1.01 seconds\n" ] } ], @@ -207,7 +5073,7 @@ }, { "cell_type": "markdown", - "id": "267de7b7", + "id": "731bd13a", "metadata": {}, "source": [ "We've seen that parallelism can help speed up our transformations, with the degree of speedup depending on the number of CPU cores available. However, it's worth noting that there may be some overhead associated with the process of splitting the dataset, pickling objects and functions, and passing them to the parallel child processes. As a result, it may not always be worthwhile to use parallelism, particularly for smaller datasets or certain types of fingerprints.\n", @@ -220,13 +5086,13 @@ { "cell_type": "code", "execution_count": 7, - "id": "f8b61e6c", + "id": "ef6d2b0c", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:32.113041Z", - "iopub.status.busy": "2024-04-12T12:10:32.112800Z", - "iopub.status.idle": "2024-04-12T12:10:32.220711Z", - "shell.execute_reply": "2024-04-12T12:10:32.219973Z" + "iopub.execute_input": "2024-11-24T09:27:42.387160Z", + "iopub.status.busy": "2024-11-24T09:27:42.386886Z", + "iopub.status.idle": "2024-11-24T09:27:42.484867Z", + "shell.execute_reply": "2024-11-24T09:27:42.484222Z" }, "lines_to_next_cell": 2, "title": "Some of the benchmarking plots" @@ -256,7 +5122,7 @@ }, { "cell_type": "markdown", - "id": "9c29a58f", + "id": "2aac85b1", "metadata": {}, "source": [ "Interestingly, we observed that parallelism actually took longer to calculate the fingerprints in some cases, which is a perfect illustration of the overhead issue associated with parallelism. Generally, the faster the fingerprint calculation in itself, the larger the dataset needs to be for parallelism to be worthwhile. For example, the Descriptor transformer, which is one of the slowest, can benefit even for smaller datasets, while for faster fingerprint types like Morgan, Atompairs, and Topological Torsion fingerprints, the dataset needs to be larger.\n", diff --git a/notebooks/08_external_library_skopt.ipynb b/notebooks/08_external_library_skopt.ipynb index de50fee..8d8121f 100644 --- a/notebooks/08_external_library_skopt.ipynb +++ b/notebooks/08_external_library_skopt.ipynb @@ -3,8 +3,14 @@ { "cell_type": "code", "execution_count": 1, - "id": "7111cd27", + "id": "c0f4155f", "metadata": { + "execution": { + "iopub.execute_input": "2024-11-24T09:27:44.212745Z", + "iopub.status.busy": "2024-11-24T09:27:44.212299Z", + "iopub.status.idle": "2024-11-24T09:27:47.165987Z", + "shell.execute_reply": "2024-11-24T09:27:47.162995Z" + }, "title": "Needs scikit-optimize" }, "outputs": [ @@ -12,15 +18,21 @@ "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: scikit-optimize in /home/esben/python_envs/vscode/lib/python3.10/site-packages (0.10.1)\n", - "Requirement already satisfied: packaging>=21.3 in /home/esben/python_envs/vscode/lib/python3.10/site-packages (from scikit-optimize) (23.2)\n", - "Requirement already satisfied: pyaml>=16.9 in /home/esben/python_envs/vscode/lib/python3.10/site-packages (from scikit-optimize) (23.12.0)\n", - "Requirement already satisfied: joblib>=0.11 in /home/esben/python_envs/vscode/lib/python3.10/site-packages (from scikit-optimize) (1.3.2)\n", - "Requirement already satisfied: scikit-learn>=1.0.0 in /home/esben/python_envs/vscode/lib/python3.10/site-packages (from scikit-optimize) (1.3.1)\n", - "Requirement already satisfied: scipy>=1.1.0 in /home/esben/python_envs/vscode/lib/python3.10/site-packages (from scikit-optimize) (1.11.3)\n", - "Requirement already satisfied: numpy>=1.20.3 in /home/esben/python_envs/vscode/lib/python3.10/site-packages (from scikit-optimize) (1.26.0)\n", - "Requirement already satisfied: PyYAML in /home/esben/python_envs/vscode/lib/python3.10/site-packages (from pyaml>=16.9->scikit-optimize) (6.0.1)\n", - "Requirement already satisfied: threadpoolctl>=2.0.0 in /home/esben/python_envs/vscode/lib/python3.10/site-packages (from scikit-learn>=1.0.0->scikit-optimize) (3.2.0)\n" + "Requirement already satisfied: scikit-optimize in /home/esben/python_envs/vscode/lib/python3.10/site-packages (0.10.2)\r\n", + "Requirement already satisfied: scikit-learn>=1.0.0 in /home/esben/python_envs/vscode/lib/python3.10/site-packages (from scikit-optimize) (1.5.2)\r\n", + "Requirement already satisfied: joblib>=0.11 in /home/esben/python_envs/vscode/lib/python3.10/site-packages (from scikit-optimize) (1.3.2)\r\n", + "Requirement already satisfied: scipy>=1.1.0 in /home/esben/python_envs/vscode/lib/python3.10/site-packages (from scikit-optimize) (1.11.3)\r\n", + "Requirement already satisfied: packaging>=21.3 in /home/esben/python_envs/vscode/lib/python3.10/site-packages (from scikit-optimize) (23.2)\r\n", + "Requirement already satisfied: pyaml>=16.9 in /home/esben/python_envs/vscode/lib/python3.10/site-packages (from scikit-optimize) (23.12.0)\r\n", + "Requirement already satisfied: numpy>=1.20.3 in /home/esben/python_envs/vscode/lib/python3.10/site-packages (from scikit-optimize) (1.26.4)\r\n", + "Requirement already satisfied: PyYAML in /home/esben/python_envs/vscode/lib/python3.10/site-packages (from pyaml>=16.9->scikit-optimize) (6.0.1)\r\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: threadpoolctl>=3.1.0 in /home/esben/python_envs/vscode/lib/python3.10/site-packages (from scikit-learn>=1.0.0->scikit-optimize) (3.2.0)\r\n" ] } ], @@ -31,9 +43,14 @@ { "cell_type": "code", "execution_count": 2, - "id": "5648edb8", + "id": "49f80040", "metadata": { - "lines_to_next_cell": 0 + "execution": { + "iopub.execute_input": "2024-11-24T09:27:47.174431Z", + "iopub.status.busy": "2024-11-24T09:27:47.173616Z", + "iopub.status.idle": "2024-11-24T09:27:47.507299Z", + "shell.execute_reply": "2024-11-24T09:27:47.506634Z" + } }, "outputs": [], "source": [ @@ -45,8 +62,15 @@ { "cell_type": "code", "execution_count": 3, - "id": "6825d2cd", - "metadata": {}, + "id": "f1268213", + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-24T09:27:47.510217Z", + "iopub.status.busy": "2024-11-24T09:27:47.509924Z", + "iopub.status.idle": "2024-11-24T09:27:48.065273Z", + "shell.execute_reply": "2024-11-24T09:27:48.064596Z" + } + }, "outputs": [], "source": [ "from sklearn.linear_model import Ridge\n", @@ -61,9 +85,14 @@ { "cell_type": "code", "execution_count": 4, - "id": "12f97eb7", + "id": "7239cf27", "metadata": { - "lines_to_next_cell": 0 + "execution": { + "iopub.execute_input": "2024-11-24T09:27:48.068114Z", + "iopub.status.busy": "2024-11-24T09:27:48.067809Z", + "iopub.status.idle": "2024-11-24T09:27:48.129879Z", + "shell.execute_reply": "2024-11-24T09:27:48.129159Z" + } }, "outputs": [], "source": [ @@ -76,8 +105,15 @@ { "cell_type": "code", "execution_count": 5, - "id": "3e9f36e9", - "metadata": {}, + "id": "aabbba9d", + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-24T09:27:48.132744Z", + "iopub.status.busy": "2024-11-24T09:27:48.132424Z", + "iopub.status.idle": "2024-11-24T09:27:48.173318Z", + "shell.execute_reply": "2024-11-24T09:27:48.172690Z" + } + }, "outputs": [], "source": [ "full_set = False\n", @@ -86,32 +122,443 @@ " csv_file = \"SLC6A4_active_excape_export.csv\"\n", " if not os.path.exists(csv_file):\n", " import urllib.request\n", + "\n", " url = \"https://ndownloader.figshare.com/files/25747817\"\n", " urllib.request.urlretrieve(url, csv_file)\n", "else:\n", - " csv_file = '../tests/data/SLC6A4_active_excapedb_subset.csv'\n", + " csv_file = \"../tests/data/SLC6A4_active_excapedb_subset.csv\"\n", "\n", "data = pd.read_csv(csv_file)\n", "trf = SmilesToMolTransformer()\n", - "data['ROMol'] = trf.transform(data.SMILES.values).flatten()" + "data[\"ROMol\"] = trf.transform(data.SMILES.values).flatten()" ] }, { "cell_type": "code", "execution_count": 6, - "id": "67d54492", + "id": "488a3e82", "metadata": { + "execution": { + "iopub.execute_input": "2024-11-24T09:27:48.176242Z", + "iopub.status.busy": "2024-11-24T09:27:48.175854Z", + "iopub.status.idle": "2024-11-24T09:27:48.188154Z", + "shell.execute_reply": "2024-11-24T09:27:48.187463Z" + }, "lines_to_next_cell": 0 }, "outputs": [ { "data": { "text/html": [ - "
Pipeline(steps=[('morganfingerprinttransformer',\n",
+       "
Pipeline(steps=[('morganfingerprinttransformer',\n",
        "                 MorganFingerprintTransformer()),\n",
-       "                ('ridge', Ridge())])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
MorganFingerprintTransformer()
" ], "text/plain": [ "Pipeline(steps=[('morganfingerprinttransformer',\n", @@ -132,14 +579,21 @@ { "cell_type": "code", "execution_count": 7, - "id": "e3ce7fc0", - "metadata": {}, + "id": "44811b8e", + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-24T09:27:48.190932Z", + "iopub.status.busy": "2024-11-24T09:27:48.190654Z", + "iopub.status.idle": "2024-11-24T09:27:48.195099Z", + "shell.execute_reply": "2024-11-24T09:27:48.194411Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'memory': None, 'steps': [('morganfingerprinttransformer', MorganFingerprintTransformer()), ('ridge', Ridge())], 'verbose': False, 'morganfingerprinttransformer': MorganFingerprintTransformer(), 'ridge': Ridge(), 'morganfingerprinttransformer__nBits': 2048, 'morganfingerprinttransformer__parallel': False, 'morganfingerprinttransformer__radius': 2, 'morganfingerprinttransformer__useBondTypes': True, 'morganfingerprinttransformer__useChirality': False, 'morganfingerprinttransformer__useCounts': False, 'morganfingerprinttransformer__useFeatures': False, 'ridge__alpha': 1.0, 'ridge__copy_X': True, 'ridge__fit_intercept': True, 'ridge__max_iter': None, 'ridge__positive': False, 'ridge__random_state': None, 'ridge__solver': 'auto', 'ridge__tol': 0.0001}\n" + "{'memory': None, 'steps': [('morganfingerprinttransformer', MorganFingerprintTransformer()), ('ridge', Ridge())], 'verbose': False, 'morganfingerprinttransformer': MorganFingerprintTransformer(), 'ridge': Ridge(), 'morganfingerprinttransformer__fpSize': 2048, 'morganfingerprinttransformer__parallel': False, 'morganfingerprinttransformer__radius': 2, 'morganfingerprinttransformer__safe_inference_mode': False, 'morganfingerprinttransformer__useBondTypes': True, 'morganfingerprinttransformer__useChirality': False, 'morganfingerprinttransformer__useCounts': False, 'morganfingerprinttransformer__useFeatures': False, 'ridge__alpha': 1.0, 'ridge__copy_X': True, 'ridge__fit_intercept': True, 'ridge__max_iter': None, 'ridge__positive': False, 'ridge__random_state': None, 'ridge__solver': 'auto', 'ridge__tol': 0.0001}\n" ] } ], @@ -150,23 +604,28 @@ { "cell_type": "code", "execution_count": 8, - "id": "0a155dd3", + "id": "49eb7dbe", "metadata": { - "lines_to_next_cell": 0 + "execution": { + "iopub.execute_input": "2024-11-24T09:27:48.197795Z", + "iopub.status.busy": "2024-11-24T09:27:48.197479Z", + "iopub.status.idle": "2024-11-24T09:27:48.205564Z", + "shell.execute_reply": "2024-11-24T09:27:48.205022Z" + } }, "outputs": [], "source": [ "max_bits = 4096\n", "\n", "morgan_space = [\n", - " Categorical([True, False], name='morganfingerprinttransformer__useCounts'),\n", - " Categorical([True, False], name='morganfingerprinttransformer__useFeatures'),\n", - " Integer(512,max_bits, name='morganfingerprinttransformer__nBits'),\n", - " Integer(1,3, name='morganfingerprinttransformer__radius')\n", + " Categorical([True, False], name=\"morganfingerprinttransformer__useCounts\"),\n", + " Categorical([True, False], name=\"morganfingerprinttransformer__useFeatures\"),\n", + " Integer(512, max_bits, name=\"morganfingerprinttransformer__fpSize\"),\n", + " Integer(1, 3, name=\"morganfingerprinttransformer__radius\"),\n", "]\n", "\n", "\n", - "regressor_space = [Real(1e-2, 1e3, \"log-uniform\", name='ridge__alpha')]\n", + "regressor_space = [Real(1e-2, 1e3, \"log-uniform\", name=\"ridge__alpha\")]\n", "\n", "search_space = morgan_space + regressor_space" ] @@ -174,9 +633,14 @@ { "cell_type": "code", "execution_count": 9, - "id": "abb750a3", + "id": "3818beb2", "metadata": { - "lines_to_next_cell": 0 + "execution": { + "iopub.execute_input": "2024-11-24T09:27:48.207944Z", + "iopub.status.busy": "2024-11-24T09:27:48.207727Z", + "iopub.status.idle": "2024-11-24T09:27:48.211453Z", + "shell.execute_reply": "2024-11-24T09:27:48.210957Z" + } }, "outputs": [], "source": [ @@ -186,15 +650,29 @@ " print(f\"{key}:{value} - {type(value)}\")\n", " pipe.set_params(**params)\n", "\n", - " return -np.mean(cross_val_score(pipe, data.ROMol, data.pXC50, cv=2, n_jobs=-1,\n", - " scoring=\"neg_mean_absolute_error\"))" + " return -np.mean(\n", + " cross_val_score(\n", + " pipe,\n", + " data.ROMol,\n", + " data.pXC50,\n", + " cv=2,\n", + " n_jobs=-1,\n", + " scoring=\"neg_mean_absolute_error\",\n", + " )\n", + " )" ] }, { "cell_type": "code", "execution_count": 10, - "id": "8b6a546c", + "id": "aa6b3af8", "metadata": { + "execution": { + "iopub.execute_input": "2024-11-24T09:27:48.213752Z", + "iopub.status.busy": "2024-11-24T09:27:48.213531Z", + "iopub.status.idle": "2024-11-24T09:27:50.948132Z", + "shell.execute_reply": "2024-11-24T09:27:50.947483Z" + }, "lines_to_next_cell": 0, "title": "THIS takes forever on my machine with a GradientBoostingRegressor" }, @@ -205,7 +683,7 @@ "text": [ "morganfingerprinttransformer__useCounts:False - \n", "morganfingerprinttransformer__useFeatures:False - \n", - "morganfingerprinttransformer__nBits:3587 - \n", + "morganfingerprinttransformer__fpSize:3587 - \n", "morganfingerprinttransformer__radius:3 - \n", "ridge__alpha:13.116515715358098 - \n" ] @@ -216,47 +694,65 @@ "text": [ "morganfingerprinttransformer__useCounts:True - \n", "morganfingerprinttransformer__useFeatures:True - \n", - "morganfingerprinttransformer__nBits:715 - \n", + "morganfingerprinttransformer__fpSize:715 - \n", "morganfingerprinttransformer__radius:2 - \n", - "ridge__alpha:2.445263057083992 - \n", + "ridge__alpha:2.445263057083992 - \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "morganfingerprinttransformer__useCounts:False - \n", "morganfingerprinttransformer__useFeatures:True - \n", - "morganfingerprinttransformer__nBits:1920 - \n", + "morganfingerprinttransformer__fpSize:1920 - \n", "morganfingerprinttransformer__radius:3 - \n", "ridge__alpha:0.48638570461894715 - \n", "morganfingerprinttransformer__useCounts:False - \n", "morganfingerprinttransformer__useFeatures:True - \n", - "morganfingerprinttransformer__nBits:3942 - \n", + "morganfingerprinttransformer__fpSize:3942 - \n", "morganfingerprinttransformer__radius:1 - \n", "ridge__alpha:224.09712855921126 - \n", "morganfingerprinttransformer__useCounts:True - \n", "morganfingerprinttransformer__useFeatures:False - \n", - "morganfingerprinttransformer__nBits:2377 - \n", + "morganfingerprinttransformer__fpSize:2377 - \n", "morganfingerprinttransformer__radius:2 - \n", - "ridge__alpha:40.10174523739503 - \n", + "ridge__alpha:40.10174523739503 - \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "morganfingerprinttransformer__useCounts:False - \n", "morganfingerprinttransformer__useFeatures:False - \n", - "morganfingerprinttransformer__nBits:3231 - \n", + "morganfingerprinttransformer__fpSize:3231 - \n", "morganfingerprinttransformer__radius:1 - \n", "ridge__alpha:2.333469328026273 - \n", "morganfingerprinttransformer__useCounts:True - \n", "morganfingerprinttransformer__useFeatures:False - \n", - "morganfingerprinttransformer__nBits:1288 - \n", + "morganfingerprinttransformer__fpSize:1288 - \n", "morganfingerprinttransformer__radius:1 - \n", "ridge__alpha:0.41754668393896904 - \n", "morganfingerprinttransformer__useCounts:True - \n", "morganfingerprinttransformer__useFeatures:True - \n", - "morganfingerprinttransformer__nBits:1897 - \n", + "morganfingerprinttransformer__fpSize:1897 - \n", "morganfingerprinttransformer__radius:3 - \n", "ridge__alpha:1.777255838269662 - \n", "morganfingerprinttransformer__useCounts:False - \n", "morganfingerprinttransformer__useFeatures:False - \n", - "morganfingerprinttransformer__nBits:868 - \n", + "morganfingerprinttransformer__fpSize:868 - \n", "morganfingerprinttransformer__radius:3 - \n", - "ridge__alpha:18.43742127649598 - \n", + "ridge__alpha:18.43742127649598 - \n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "morganfingerprinttransformer__useCounts:True - \n", "morganfingerprinttransformer__useFeatures:True - \n", - "morganfingerprinttransformer__nBits:3202 - \n", + "morganfingerprinttransformer__fpSize:3202 - \n", "morganfingerprinttransformer__radius:2 - \n", "ridge__alpha:0.4219258607446576 - \n" ] @@ -281,8 +777,14 @@ { "cell_type": "code", "execution_count": 11, - "id": "243f5309", + "id": "42d8fc82", "metadata": { + "execution": { + "iopub.execute_input": "2024-11-24T09:27:50.951138Z", + "iopub.status.busy": "2024-11-24T09:27:50.950922Z", + "iopub.status.idle": "2024-11-24T09:27:50.956593Z", + "shell.execute_reply": "2024-11-24T09:27:50.956046Z" + }, "lines_to_next_cell": 0 }, "outputs": [ @@ -291,20 +793,27 @@ "output_type": "stream", "text": [ "Best parameters:\n", - "{'morganfingerprinttransformer__useCounts': False, 'morganfingerprinttransformer__useFeatures': False, 'morganfingerprinttransformer__nBits': 3231, 'morganfingerprinttransformer__radius': 1, 'ridge__alpha': 2.333469328026273}\n" + "{'morganfingerprinttransformer__useCounts': False, 'morganfingerprinttransformer__useFeatures': False, 'morganfingerprinttransformer__fpSize': 3231, 'morganfingerprinttransformer__radius': 1, 'ridge__alpha': 2.333469328026273}\n" ] } ], "source": [ "print(\"\"\"Best parameters:\"\"\")\n", - "print({param.name:value for param,value in zip(pipe_gp.space, pipe_gp.x) })" + "print({param.name: value for param, value in zip(pipe_gp.space, pipe_gp.x)})" ] }, { "cell_type": "code", "execution_count": 12, - "id": "b95b09d1", - "metadata": {}, + "id": "c5e9fda9", + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-24T09:27:50.959607Z", + "iopub.status.busy": "2024-11-24T09:27:50.959088Z", + "iopub.status.idle": "2024-11-24T09:27:51.588690Z", + "shell.execute_reply": "2024-11-24T09:27:51.587982Z" + } + }, "outputs": [ { "data": { @@ -329,13 +838,14 @@ ], "source": [ "from skopt.plots import plot_convergence\n", + "\n", "plot_convergence(pipe_gp)" ] }, { "cell_type": "code", "execution_count": null, - "id": "8371d672", + "id": "aebdc43d", "metadata": {}, "outputs": [], "source": [] @@ -344,8 +854,7 @@ "metadata": { "jupytext": { "cell_metadata_filter": "title,-all", - "formats": "ipynb,py:percent", - "main_language": "python" + "formats": "ipynb,py:percent" }, "kernelspec": { "display_name": "vscode", diff --git a/notebooks/08_external_library_skopt.py b/notebooks/08_external_library_skopt.py index 4254251..090983e 100644 --- a/notebooks/08_external_library_skopt.py +++ b/notebooks/08_external_library_skopt.py @@ -21,6 +21,7 @@ import os import numpy as np import pandas as pd + # %% from sklearn.linear_model import Ridge from sklearn.model_selection import cross_val_score @@ -35,6 +36,7 @@ from skopt.utils import use_named_args from skopt import gp_minimize + # %% full_set = False @@ -42,14 +44,15 @@ csv_file = "SLC6A4_active_excape_export.csv" if not os.path.exists(csv_file): import urllib.request + url = "https://ndownloader.figshare.com/files/25747817" urllib.request.urlretrieve(url, csv_file) else: - csv_file = '../tests/data/SLC6A4_active_excapedb_subset.csv' + csv_file = "../tests/data/SLC6A4_active_excapedb_subset.csv" data = pd.read_csv(csv_file) trf = SmilesToMolTransformer() -data['ROMol'] = trf.transform(data.SMILES.values).flatten() +data["ROMol"] = trf.transform(data.SMILES.values).flatten() # %% pipe = make_pipeline(MorganFingerprintTransformer(), Ridge()) @@ -61,16 +64,18 @@ max_bits = 4096 morgan_space = [ - Categorical([True, False], name='morganfingerprinttransformer__useCounts'), - Categorical([True, False], name='morganfingerprinttransformer__useFeatures'), - Integer(512,max_bits, name='morganfingerprinttransformer__nBits'), - Integer(1,3, name='morganfingerprinttransformer__radius') + Categorical([True, False], name="morganfingerprinttransformer__useCounts"), + Categorical([True, False], name="morganfingerprinttransformer__useFeatures"), + Integer(512, max_bits, name="morganfingerprinttransformer__fpSize"), + Integer(1, 3, name="morganfingerprinttransformer__radius"), ] -regressor_space = [Real(1e-2, 1e3, "log-uniform", name='ridge__alpha')] +regressor_space = [Real(1e-2, 1e3, "log-uniform", name="ridge__alpha")] search_space = morgan_space + regressor_space + + # %% @use_named_args(search_space) def objective(**params): @@ -78,17 +83,28 @@ def objective(**params): print(f"{key}:{value} - {type(value)}") pipe.set_params(**params) - return -np.mean(cross_val_score(pipe, data.ROMol, data.pXC50, cv=2, n_jobs=-1, - scoring="neg_mean_absolute_error")) + return -np.mean( + cross_val_score( + pipe, + data.ROMol, + data.pXC50, + cv=2, + n_jobs=-1, + scoring="neg_mean_absolute_error", + ) + ) + + # %% THIS takes forever on my machine with a GradientBoostingRegressor pipe_gp = gp_minimize(objective, search_space, n_calls=10, random_state=0) "Best score=%.4f" % pipe_gp.fun # %% print("""Best parameters:""") -print({param.name:value for param,value in zip(pipe_gp.space, pipe_gp.x) }) +print({param.name: value for param, value in zip(pipe_gp.space, pipe_gp.x)}) # %% from skopt.plots import plot_convergence + plot_convergence(pipe_gp) # %% diff --git a/notebooks/09_Combinatorial_Method_Usage_with_FingerPrint_Transformers.ipynb b/notebooks/09_Combinatorial_Method_Usage_with_FingerPrint_Transformers.ipynb index 8e4e2fd..18bc9ec 100644 --- a/notebooks/09_Combinatorial_Method_Usage_with_FingerPrint_Transformers.ipynb +++ b/notebooks/09_Combinatorial_Method_Usage_with_FingerPrint_Transformers.ipynb @@ -2,6 +2,7 @@ "cells": [ { "cell_type": "markdown", + "id": "0e70fca3", "metadata": {}, "source": [ "# Example: Using Multiple Different Fingerprint Transformer\n", @@ -14,7 +15,7 @@ "* Training Phase\n", "* Analysis\n", "\n", - "Authors: @VincentAlexanderScholz, @RiesBen \n", + "Authors: @VincentAlexanderScholz, @RiesBen\n", "\n", "## Imports:\n", "First we will import all the stuff that we will need for our work.\n" @@ -23,12 +24,13 @@ { "cell_type": "code", "execution_count": 1, + "id": "b705b5c9", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:33.739171Z", - "iopub.status.busy": "2024-04-12T12:10:33.738827Z", - "iopub.status.idle": "2024-04-12T12:10:34.837344Z", - "shell.execute_reply": "2024-04-12T12:10:34.836672Z" + "iopub.execute_input": "2024-11-24T09:27:55.508365Z", + "iopub.status.busy": "2024-11-24T09:27:55.507967Z", + "iopub.status.idle": "2024-11-24T09:27:56.807362Z", + "shell.execute_reply": "2024-11-24T09:27:56.806654Z" }, "lines_to_next_cell": 2 }, @@ -52,6 +54,7 @@ }, { "cell_type": "markdown", + "id": "2a4eb825", "metadata": {}, "source": [ "## Get Data:\n", @@ -64,12 +67,13 @@ { "cell_type": "code", "execution_count": 2, + "id": "34b2618a", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:10:34.840322Z", - "iopub.status.busy": "2024-04-12T12:10:34.839978Z", - "iopub.status.idle": "2024-04-12T12:10:34.881015Z", - "shell.execute_reply": "2024-04-12T12:10:34.880411Z" + "iopub.execute_input": "2024-11-24T09:27:56.810246Z", + "iopub.status.busy": "2024-11-24T09:27:56.809941Z", + "iopub.status.idle": "2024-11-24T09:27:56.851202Z", + "shell.execute_reply": "2024-11-24T09:27:56.850568Z" } }, "outputs": [ @@ -89,12 +93,13 @@ " csv_file = \"SLC6A4_active_excape_export.csv\"\n", " if not os.path.exists(csv_file):\n", " import urllib.request\n", + "\n", " url = \"https://ndownloader.figshare.com/files/25747817\"\n", " urllib.request.urlretrieve(url, csv_file)\n", "else:\n", - " csv_file = '../tests/data/SLC6A4_active_excapedb_subset.csv'\n", + " csv_file = \"../tests/data/SLC6A4_active_excapedb_subset.csv\"\n", "\n", - "#Parse Database\n", + "# Parse Database\n", "data = pd.read_csv(csv_file)\n", "\n", "PandasTools.AddMoleculeColumnToFrame(data, smilesCol=\"SMILES\")\n", @@ -103,12 +108,13 @@ }, { "cell_type": "markdown", + "id": "b8dba759", "metadata": {}, "source": [ "## Build Pipeline:\n", "In this stage we will build the Pipeline consisting of the featurization part (finger print transformers) and the model part (Ridge Regression).\n", "\n", - "Note that the featurization in this section is an hyperparameter, living in `param_grid`, and the `\"fp_transformer\"` string is just a placeholder, being replaced during pipeline execution. \n", + "Note that the featurization in this section is an hyperparameter, living in `param_grid`, and the `\"fp_transformer\"` string is just a placeholder, being replaced during pipeline execution.\n", "\n", "This way we can define multiple different scenarios in `param_grid`, that allow us to rapidly explore different combinations of settings and methodologies." ] @@ -116,16 +122,13 @@ { "cell_type": "code", "execution_count": 3, + "id": "e06042cc", "metadata": { - "ExecuteTime": { - "end_time": "2023-09-22T11:29:15.949644Z", - "start_time": "2023-09-22T11:29:15.461010Z" - }, "execution": { - "iopub.execute_input": "2024-04-12T12:10:34.883521Z", - "iopub.status.busy": "2024-04-12T12:10:34.883306Z", - "iopub.status.idle": "2024-04-12T12:10:34.893319Z", - "shell.execute_reply": "2024-04-12T12:10:34.892759Z" + "iopub.execute_input": "2024-11-24T09:27:56.854051Z", + "iopub.status.busy": "2024-11-24T09:27:56.853508Z", + "iopub.status.idle": "2024-11-24T09:27:56.863947Z", + "shell.execute_reply": "2024-11-24T09:27:56.863371Z" } }, "outputs": [ @@ -134,7 +137,7 @@ "text/plain": [ "[{'fp_transformer': [MorganFingerprintTransformer(),\n", " AvalonFingerprintTransformer()],\n", - " 'fp_transformer__nBits': [256, 512, 1024, 2048, 4096],\n", + " 'fp_transformer__fpSize': [256, 512, 1024, 2048, 4096],\n", " 'regressor__alpha': array([0.1 , 0.325, 0.55 , 0.775, 1. ])},\n", " {'fp_transformer': [RDKitFingerprintTransformer(),\n", " AtomPairFingerprintTransformer(),\n", @@ -150,24 +153,35 @@ "source": [ "\n", "regressor = Ridge()\n", - "optimization_pipe = Pipeline([(\"fp_transformer\", \"fp_transformer\"), # this is a placeholder for different transformers\n", - " (\"regressor\", regressor)])\n", - "\n", - "param_grid = [ # Here pass different Options and Approaches\n", + "optimization_pipe = Pipeline(\n", + " [\n", + " (\n", + " \"fp_transformer\",\n", + " \"fp_transformer\",\n", + " ), # this is a placeholder for different transformers\n", + " (\"regressor\", regressor),\n", + " ]\n", + ")\n", + "\n", + "param_grid = [ # Here pass different Options and Approaches\n", " {\n", - " \"fp_transformer\": [fingerprints.MorganFingerprintTransformer(),\n", - " fingerprints.AvalonFingerprintTransformer()],\n", - " \"fp_transformer__nBits\": [2**x for x in range(8,13)],\n", + " \"fp_transformer\": [\n", + " fingerprints.MorganFingerprintTransformer(),\n", + " fingerprints.AvalonFingerprintTransformer(),\n", + " ],\n", + " \"fp_transformer__fpSize\": [2**x for x in range(8, 13)],\n", " },\n", " {\n", - " \"fp_transformer\": [fingerprints.RDKitFingerprintTransformer(),\n", - " fingerprints.AtomPairFingerprintTransformer(),\n", - " fingerprints.MACCSKeysFingerprintTransformer()], \n", + " \"fp_transformer\": [\n", + " fingerprints.RDKitFingerprintTransformer(),\n", + " fingerprints.AtomPairFingerprintTransformer(),\n", + " fingerprints.MACCSKeysFingerprintTransformer(),\n", + " ],\n", " },\n", "]\n", "\n", "global_options = {\n", - " \"regressor__alpha\": np.linspace(0.1,1,5),\n", + " \"regressor__alpha\": np.linspace(0.1, 1, 5),\n", "}\n", "\n", "[params.update(global_options) for params in param_grid]\n", @@ -177,6 +191,7 @@ }, { "cell_type": "markdown", + "id": "521aa24a", "metadata": {}, "source": [ "## Train Model\n", @@ -186,16 +201,13 @@ { "cell_type": "code", "execution_count": 4, + "id": "f1cf66df", "metadata": { - "ExecuteTime": { - "end_time": "2023-09-22T11:29:15.960939Z", - "start_time": "2023-09-22T11:29:15.461078Z" - }, "execution": { - "iopub.execute_input": "2024-04-12T12:10:34.895684Z", - "iopub.status.busy": "2024-04-12T12:10:34.895457Z", - "iopub.status.idle": "2024-04-12T12:11:08.386413Z", - "shell.execute_reply": "2024-04-12T12:11:08.385791Z" + "iopub.execute_input": "2024-11-24T09:27:56.866817Z", + "iopub.status.busy": "2024-11-24T09:27:56.866251Z", + "iopub.status.idle": "2024-11-24T09:28:28.265183Z", + "shell.execute_reply": "2024-11-24T09:28:28.264595Z" } }, "outputs": [ @@ -203,28 +215,30 @@ "name": "stdout", "output_type": "stream", "text": [ - "Runtime: 33.48\n" + "Runtime: 31.39\n" ] } ], "source": [ "# Split Data\n", - "mol_list_train, mol_list_test, y_train, y_test = train_test_split(data.ROMol, data.pXC50, random_state=0)\n", + "mol_list_train, mol_list_test, y_train, y_test = train_test_split(\n", + " data.ROMol, data.pXC50, random_state=0\n", + ")\n", "\n", "# Define Search Process\n", - "grid = GridSearchCV(optimization_pipe, n_jobs=1,\n", - " param_grid=param_grid)\n", + "grid = GridSearchCV(optimization_pipe, n_jobs=1, param_grid=param_grid)\n", "\n", "# Train\n", "t0 = time()\n", "grid.fit(mol_list_train, y_train.values)\n", "t1 = time()\n", "\n", - "print(f'Runtime: {t1-t0:0.2F}')" + "print(f\"Runtime: {t1-t0:0.2F}\")" ] }, { "cell_type": "markdown", + "id": "55aa1549", "metadata": {}, "source": [ "## Analysis\n", @@ -235,12 +249,13 @@ { "cell_type": "code", "execution_count": 5, + "id": "f80006f8", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:08.390298Z", - "iopub.status.busy": "2024-04-12T12:11:08.389259Z", - "iopub.status.idle": "2024-04-12T12:11:08.443953Z", - "shell.execute_reply": "2024-04-12T12:11:08.443334Z" + "iopub.execute_input": "2024-11-24T09:28:28.268630Z", + "iopub.status.busy": "2024-11-24T09:28:28.268131Z", + "iopub.status.idle": "2024-11-24T09:28:28.320208Z", + "shell.execute_reply": "2024-11-24T09:28:28.319598Z" } }, "outputs": [ @@ -270,7 +285,7 @@ " mean_score_time\n", " std_score_time\n", " param_fp_transformer\n", - " param_fp_transformer__nBits\n", + " param_fp_transformer__fpSize\n", " param_regressor__alpha\n", " params\n", " split0_test_score\n", @@ -286,13 +301,13 @@ " \n", " \n", " 0\n", - " 0.025447\n", - " 0.002689\n", - " 0.003600\n", - " 0.000743\n", + " 0.011822\n", + " 0.001013\n", + " 0.003596\n", + " 0.001311\n", " MorganFingerprintTransformer()\n", - " 256\n", - " 0.1\n", + " 256.0\n", + " 0.100\n", " {'fp_transformer': MorganFingerprintTransforme...\n", " 0.017975\n", " 0.394682\n", @@ -305,12 +320,12 @@ " \n", " \n", " 1\n", - " 0.025515\n", - " 0.001279\n", - " 0.003084\n", - " 0.000064\n", + " 0.010119\n", + " 0.000152\n", + " 0.002832\n", + " 0.000070\n", " MorganFingerprintTransformer()\n", - " 256\n", + " 256.0\n", " 0.325\n", " {'fp_transformer': MorganFingerprintTransforme...\n", " 0.078758\n", @@ -324,13 +339,13 @@ " \n", " \n", " 2\n", - " 0.026243\n", - " 0.000330\n", - " 0.003101\n", - " 0.000081\n", + " 0.010302\n", + " 0.000429\n", + " 0.003310\n", + " 0.000967\n", " MorganFingerprintTransformer()\n", - " 256\n", - " 0.55\n", + " 256.0\n", + " 0.550\n", " {'fp_transformer': MorganFingerprintTransforme...\n", " 0.128221\n", " 0.490253\n", @@ -343,12 +358,12 @@ " \n", " \n", " 3\n", - " 0.026571\n", - " 0.001018\n", - " 0.003208\n", - " 0.000214\n", + " 0.010192\n", + " 0.000159\n", + " 0.002859\n", + " 0.000089\n", " MorganFingerprintTransformer()\n", - " 256\n", + " 256.0\n", " 0.775\n", " {'fp_transformer': MorganFingerprintTransforme...\n", " 0.169585\n", @@ -362,13 +377,13 @@ " \n", " \n", " 4\n", - " 0.027070\n", - " 0.001007\n", - " 0.003137\n", - " 0.000176\n", + " 0.010103\n", + " 0.000126\n", + " 0.002868\n", + " 0.000119\n", " MorganFingerprintTransformer()\n", - " 256\n", - " 1.0\n", + " 256.0\n", + " 1.000\n", " {'fp_transformer': MorganFingerprintTransforme...\n", " 0.204831\n", " 0.546774\n", @@ -400,13 +415,13 @@ " \n", " \n", " 60\n", - " 0.113114\n", - " 0.018944\n", - " 0.029888\n", - " 0.008465\n", + " 0.100754\n", + " 0.006501\n", + " 0.025367\n", + " 0.001743\n", " MACCSKeysFingerprintTransformer()\n", " NaN\n", - " 0.1\n", + " 0.100\n", " {'fp_transformer': MACCSKeysFingerprintTransfo...\n", " -1.649022\n", " -1.943461\n", @@ -419,10 +434,10 @@ " \n", " \n", " 61\n", - " 0.102846\n", - " 0.001870\n", - " 0.025450\n", - " 0.001682\n", + " 0.118554\n", + " 0.022440\n", + " 0.036584\n", + " 0.023911\n", " MACCSKeysFingerprintTransformer()\n", " NaN\n", " 0.325\n", @@ -438,13 +453,13 @@ " \n", " \n", " 62\n", - " 0.103216\n", - " 0.001285\n", - " 0.025450\n", - " 0.001661\n", + " 0.097552\n", + " 0.001638\n", + " 0.025571\n", + " 0.001753\n", " MACCSKeysFingerprintTransformer()\n", " NaN\n", - " 0.55\n", + " 0.550\n", " {'fp_transformer': MACCSKeysFingerprintTransfo...\n", " -0.657588\n", " -0.505782\n", @@ -457,10 +472,10 @@ " \n", " \n", " 63\n", - " 0.103230\n", - " 0.002304\n", - " 0.025570\n", - " 0.001561\n", + " 0.098300\n", + " 0.001744\n", + " 0.025552\n", + " 0.001695\n", " MACCSKeysFingerprintTransformer()\n", " NaN\n", " 0.775\n", @@ -476,13 +491,13 @@ " \n", " \n", " 64\n", - " 0.104568\n", - " 0.002589\n", - " 0.025515\n", - " 0.001571\n", + " 0.098103\n", + " 0.001473\n", + " 0.025320\n", + " 0.001673\n", " MACCSKeysFingerprintTransformer()\n", " NaN\n", - " 1.0\n", + " 1.000\n", " {'fp_transformer': MACCSKeysFingerprintTransfo...\n", " -0.339715\n", " -0.266652\n", @@ -500,43 +515,43 @@ ], "text/plain": [ " mean_fit_time std_fit_time mean_score_time std_score_time \\\n", - "0 0.025447 0.002689 0.003600 0.000743 \n", - "1 0.025515 0.001279 0.003084 0.000064 \n", - "2 0.026243 0.000330 0.003101 0.000081 \n", - "3 0.026571 0.001018 0.003208 0.000214 \n", - "4 0.027070 0.001007 0.003137 0.000176 \n", + "0 0.011822 0.001013 0.003596 0.001311 \n", + "1 0.010119 0.000152 0.002832 0.000070 \n", + "2 0.010302 0.000429 0.003310 0.000967 \n", + "3 0.010192 0.000159 0.002859 0.000089 \n", + "4 0.010103 0.000126 0.002868 0.000119 \n", ".. ... ... ... ... \n", - "60 0.113114 0.018944 0.029888 0.008465 \n", - "61 0.102846 0.001870 0.025450 0.001682 \n", - "62 0.103216 0.001285 0.025450 0.001661 \n", - "63 0.103230 0.002304 0.025570 0.001561 \n", - "64 0.104568 0.002589 0.025515 0.001571 \n", + "60 0.100754 0.006501 0.025367 0.001743 \n", + "61 0.118554 0.022440 0.036584 0.023911 \n", + "62 0.097552 0.001638 0.025571 0.001753 \n", + "63 0.098300 0.001744 0.025552 0.001695 \n", + "64 0.098103 0.001473 0.025320 0.001673 \n", "\n", - " param_fp_transformer param_fp_transformer__nBits \\\n", - "0 MorganFingerprintTransformer() 256 \n", - "1 MorganFingerprintTransformer() 256 \n", - "2 MorganFingerprintTransformer() 256 \n", - "3 MorganFingerprintTransformer() 256 \n", - "4 MorganFingerprintTransformer() 256 \n", - ".. ... ... \n", - "60 MACCSKeysFingerprintTransformer() NaN \n", - "61 MACCSKeysFingerprintTransformer() NaN \n", - "62 MACCSKeysFingerprintTransformer() NaN \n", - "63 MACCSKeysFingerprintTransformer() NaN \n", - "64 MACCSKeysFingerprintTransformer() NaN \n", + " param_fp_transformer param_fp_transformer__fpSize \\\n", + "0 MorganFingerprintTransformer() 256.0 \n", + "1 MorganFingerprintTransformer() 256.0 \n", + "2 MorganFingerprintTransformer() 256.0 \n", + "3 MorganFingerprintTransformer() 256.0 \n", + "4 MorganFingerprintTransformer() 256.0 \n", + ".. ... ... \n", + "60 MACCSKeysFingerprintTransformer() NaN \n", + "61 MACCSKeysFingerprintTransformer() NaN \n", + "62 MACCSKeysFingerprintTransformer() NaN \n", + "63 MACCSKeysFingerprintTransformer() NaN \n", + "64 MACCSKeysFingerprintTransformer() NaN \n", "\n", - " param_regressor__alpha params \\\n", - "0 0.1 {'fp_transformer': MorganFingerprintTransforme... \n", - "1 0.325 {'fp_transformer': MorganFingerprintTransforme... \n", - "2 0.55 {'fp_transformer': MorganFingerprintTransforme... \n", - "3 0.775 {'fp_transformer': MorganFingerprintTransforme... \n", - "4 1.0 {'fp_transformer': MorganFingerprintTransforme... \n", - ".. ... ... \n", - "60 0.1 {'fp_transformer': MACCSKeysFingerprintTransfo... \n", - "61 0.325 {'fp_transformer': MACCSKeysFingerprintTransfo... \n", - "62 0.55 {'fp_transformer': MACCSKeysFingerprintTransfo... \n", - "63 0.775 {'fp_transformer': MACCSKeysFingerprintTransfo... \n", - "64 1.0 {'fp_transformer': MACCSKeysFingerprintTransfo... \n", + " param_regressor__alpha params \\\n", + "0 0.100 {'fp_transformer': MorganFingerprintTransforme... \n", + "1 0.325 {'fp_transformer': MorganFingerprintTransforme... \n", + "2 0.550 {'fp_transformer': MorganFingerprintTransforme... \n", + "3 0.775 {'fp_transformer': MorganFingerprintTransforme... \n", + "4 1.000 {'fp_transformer': MorganFingerprintTransforme... \n", + ".. ... ... \n", + "60 0.100 {'fp_transformer': MACCSKeysFingerprintTransfo... \n", + "61 0.325 {'fp_transformer': MACCSKeysFingerprintTransfo... \n", + "62 0.550 {'fp_transformer': MACCSKeysFingerprintTransfo... \n", + "63 0.775 {'fp_transformer': MACCSKeysFingerprintTransfo... \n", + "64 1.000 {'fp_transformer': MACCSKeysFingerprintTransfo... \n", "\n", " split0_test_score split1_test_score split2_test_score \\\n", "0 0.017975 0.394682 0.524598 \n", @@ -593,12 +608,13 @@ { "cell_type": "code", "execution_count": 6, + "id": "a6041579", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:08.447990Z", - "iopub.status.busy": "2024-04-12T12:11:08.446959Z", - "iopub.status.idle": "2024-04-12T12:11:08.691729Z", - "shell.execute_reply": "2024-04-12T12:11:08.691123Z" + "iopub.execute_input": "2024-11-24T09:28:28.324000Z", + "iopub.status.busy": "2024-11-24T09:28:28.323259Z", + "iopub.status.idle": "2024-11-24T09:28:28.574471Z", + "shell.execute_reply": "2024-11-24T09:28:28.573892Z" }, "lines_to_next_cell": 2 }, @@ -618,17 +634,20 @@ "# Best Fingerprint Method / Performance\n", "res_dict = {}\n", "for i, row in df_training_stats.iterrows():\n", - " fp_name = row['param_fp_transformer'] \n", - " if(fp_name in res_dict and row['mean_test_score'] > res_dict[fp_name][\"mean_test_score\"]):\n", + " fp_name = row[\"param_fp_transformer\"]\n", + " if (\n", + " fp_name in res_dict\n", + " and row[\"mean_test_score\"] > res_dict[fp_name][\"mean_test_score\"]\n", + " ):\n", " res_dict[fp_name] = row.to_dict()\n", - " elif(not fp_name in res_dict):\n", + " elif not fp_name in res_dict:\n", " res_dict[fp_name] = row.to_dict()\n", - " \n", + "\n", "df = pd.DataFrame(list(res_dict.values()))\n", - "df =df.sort_values(by=\"mean_test_score\")\n", + "df = df.sort_values(by=\"mean_test_score\")\n", "\n", - "#plot test score vs. approach\n", - "plt.figure(figsize=[14,5])\n", + "# plot test score vs. approach\n", + "plt.figure(figsize=[14, 5])\n", "plt.bar(range(len(df)), df.mean_test_score, yerr=df.std_test_score)\n", "plt.xticks(range(len(df)), df.param_fp_transformer, rotation=90, fontsize=14)\n", "plt.ylabel(\"mean score\", fontsize=14)\n", @@ -639,19 +658,20 @@ { "cell_type": "code", "execution_count": 7, + "id": "3ee14366", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:08.694324Z", - "iopub.status.busy": "2024-04-12T12:11:08.694077Z", - "iopub.status.idle": "2024-04-12T12:11:09.027663Z", - "shell.execute_reply": "2024-04-12T12:11:09.026932Z" + "iopub.execute_input": "2024-11-24T09:28:28.576983Z", + "iopub.status.busy": "2024-11-24T09:28:28.576761Z", + "iopub.status.idle": "2024-11-24T09:28:28.913143Z", + "shell.execute_reply": "2024-11-24T09:28:28.912512Z" }, "lines_to_next_cell": 2 }, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -663,39 +683,49 @@ "source": [ "# Best Fingerprint Method / Performance\n", "from collections import defaultdict\n", + "\n", "res_dict = defaultdict(list)\n", "for i, row in df_training_stats.iterrows():\n", - " fp_name = row['param_fp_transformer'] \n", - " if(\"Morgan\" in str(fp_name)):\n", + " fp_name = row[\"param_fp_transformer\"]\n", + " if \"Morgan\" in str(fp_name):\n", " res_dict[fp_name].append(row)\n", "\n", "for fp_type, rows in res_dict.items():\n", " df = pd.DataFrame(rows)\n", - " df =df.sort_values(by=\"mean_test_score\")\n", - "\n", - " #plot test score vs. approach\n", - " xlabels = map(lambda x: \"_\".join(x), zip(df.param_fp_transformer__nBits.astype(str), df.param_regressor__alpha.astype(str)))\n", - "\n", - " \n", - " plt.figure(figsize=[14,5])\n", + " df = df.sort_values(by=\"mean_test_score\")\n", + "\n", + " # plot test score vs. approach\n", + " xlabels = map(\n", + " lambda x: \"_\".join(x),\n", + " zip(\n", + " df.param_fp_transformer__fpSize.astype(str),\n", + " df.param_regressor__alpha.astype(str),\n", + " ),\n", + " )\n", + "\n", + " plt.figure(figsize=[14, 5])\n", " plt.bar(range(len(df)), df.mean_test_score, yerr=df.std_test_score)\n", " plt.xticks(range(len(df)), xlabels, rotation=90, fontsize=14)\n", " plt.ylabel(\"mean score\", fontsize=14)\n", " plt.xlabel(\"bitsize_alpha\", fontsize=14)\n", "\n", - " plt.title(\"Fingerprint Transformer \"+str(fp_type).split(\"(\")[0]+\" per Bitsize\", fontsize=18)\n", + " plt.title(\n", + " \"Fingerprint Transformer \" + str(fp_type).split(\"(\")[0] + \" per Bitsize\",\n", + " fontsize=18,\n", + " )\n", " pass" ] }, { "cell_type": "code", "execution_count": 8, + "id": "20f7e139", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:09.030301Z", - "iopub.status.busy": "2024-04-12T12:11:09.030024Z", - "iopub.status.idle": "2024-04-12T12:11:09.425867Z", - "shell.execute_reply": "2024-04-12T12:11:09.425174Z" + "iopub.execute_input": "2024-11-24T09:28:28.915675Z", + "iopub.status.busy": "2024-11-24T09:28:28.915430Z", + "iopub.status.idle": "2024-11-24T09:28:29.314170Z", + "shell.execute_reply": "2024-11-24T09:28:29.313570Z" } }, "outputs": [ @@ -711,10 +741,10 @@ } ], "source": [ - "#plot ALL test score vs. approach\n", - "df =df_training_stats.sort_values(by=\"mean_test_score\")\n", + "# plot ALL test score vs. approach\n", + "df = df_training_stats.sort_values(by=\"mean_test_score\")\n", "\n", - "plt.figure(figsize=[16,9])\n", + "plt.figure(figsize=[16, 9])\n", "plt.bar(range(len(df)), df.mean_test_score, yerr=df.std_test_score)\n", "plt.ylabel(\"mean score\", fontsize=14)\n", "plt.xticks(range(len(df))[::5], df.param_fp_transformer[::5], rotation=90, fontsize=14)\n", @@ -724,6 +754,7 @@ }, { "cell_type": "markdown", + "id": "f407671c", "metadata": {}, "source": [ "This file have the following licence:\n", @@ -955,5 +986,5 @@ } }, "nbformat": 4, - "nbformat_minor": 1 + "nbformat_minor": 5 } diff --git a/notebooks/09_Combinatorial_Method_Usage_with_FingerPrint_Transformers.py b/notebooks/09_Combinatorial_Method_Usage_with_FingerPrint_Transformers.py index d8c51bb..4a39a02 100644 --- a/notebooks/09_Combinatorial_Method_Usage_with_FingerPrint_Transformers.py +++ b/notebooks/09_Combinatorial_Method_Usage_with_FingerPrint_Transformers.py @@ -24,7 +24,7 @@ # * Training Phase # * Analysis # -# Authors: @VincentAlexanderScholz, @RiesBen +# Authors: @VincentAlexanderScholz, @RiesBen # # ## Imports: # First we will import all the stuff that we will need for our work. @@ -62,12 +62,13 @@ csv_file = "SLC6A4_active_excape_export.csv" if not os.path.exists(csv_file): import urllib.request + url = "https://ndownloader.figshare.com/files/25747817" urllib.request.urlretrieve(url, csv_file) else: - csv_file = '../tests/data/SLC6A4_active_excapedb_subset.csv' + csv_file = "../tests/data/SLC6A4_active_excapedb_subset.csv" -#Parse Database +# Parse Database data = pd.read_csv(csv_file) PandasTools.AddMoleculeColumnToFrame(data, smilesCol="SMILES") @@ -77,31 +78,42 @@ # ## Build Pipeline: # In this stage we will build the Pipeline consisting of the featurization part (finger print transformers) and the model part (Ridge Regression). # -# Note that the featurization in this section is an hyperparameter, living in `param_grid`, and the `"fp_transformer"` string is just a placeholder, being replaced during pipeline execution. +# Note that the featurization in this section is an hyperparameter, living in `param_grid`, and the `"fp_transformer"` string is just a placeholder, being replaced during pipeline execution. # # This way we can define multiple different scenarios in `param_grid`, that allow us to rapidly explore different combinations of settings and methodologies. # %% regressor = Ridge() -optimization_pipe = Pipeline([("fp_transformer", "fp_transformer"), # this is a placeholder for different transformers - ("regressor", regressor)]) - -param_grid = [ # Here pass different Options and Approaches +optimization_pipe = Pipeline( + [ + ( + "fp_transformer", + "fp_transformer", + ), # this is a placeholder for different transformers + ("regressor", regressor), + ] +) + +param_grid = [ # Here pass different Options and Approaches { - "fp_transformer": [fingerprints.MorganFingerprintTransformer(), - fingerprints.AvalonFingerprintTransformer()], - "fp_transformer__nBits": [2**x for x in range(8,13)], + "fp_transformer": [ + fingerprints.MorganFingerprintTransformer(), + fingerprints.AvalonFingerprintTransformer(), + ], + "fp_transformer__fpSize": [2**x for x in range(8, 13)], }, { - "fp_transformer": [fingerprints.RDKitFingerprintTransformer(), - fingerprints.AtomPairFingerprintTransformer(), - fingerprints.MACCSKeysFingerprintTransformer()], + "fp_transformer": [ + fingerprints.RDKitFingerprintTransformer(), + fingerprints.AtomPairFingerprintTransformer(), + fingerprints.MACCSKeysFingerprintTransformer(), + ], }, ] global_options = { - "regressor__alpha": np.linspace(0.1,1,5), + "regressor__alpha": np.linspace(0.1, 1, 5), } [params.update(global_options) for params in param_grid] @@ -114,18 +126,19 @@ # %% # Split Data -mol_list_train, mol_list_test, y_train, y_test = train_test_split(data.ROMol, data.pXC50, random_state=0) +mol_list_train, mol_list_test, y_train, y_test = train_test_split( + data.ROMol, data.pXC50, random_state=0 +) # Define Search Process -grid = GridSearchCV(optimization_pipe, n_jobs=1, - param_grid=param_grid) +grid = GridSearchCV(optimization_pipe, n_jobs=1, param_grid=param_grid) # Train t0 = time() grid.fit(mol_list_train, y_train.values) t1 = time() -print(f'Runtime: {t1-t0:0.2F}') +print(f"Runtime: {t1-t0:0.2F}") # %% [markdown] # ## Analysis @@ -140,17 +153,20 @@ # Best Fingerprint Method / Performance res_dict = {} for i, row in df_training_stats.iterrows(): - fp_name = row['param_fp_transformer'] - if(fp_name in res_dict and row['mean_test_score'] > res_dict[fp_name]["mean_test_score"]): + fp_name = row["param_fp_transformer"] + if ( + fp_name in res_dict + and row["mean_test_score"] > res_dict[fp_name]["mean_test_score"] + ): res_dict[fp_name] = row.to_dict() - elif(not fp_name in res_dict): + elif not fp_name in res_dict: res_dict[fp_name] = row.to_dict() - + df = pd.DataFrame(list(res_dict.values())) -df =df.sort_values(by="mean_test_score") +df = df.sort_values(by="mean_test_score") -#plot test score vs. approach -plt.figure(figsize=[14,5]) +# plot test score vs. approach +plt.figure(figsize=[14, 5]) plt.bar(range(len(df)), df.mean_test_score, yerr=df.std_test_score) plt.xticks(range(len(df)), df.param_fp_transformer, rotation=90, fontsize=14) plt.ylabel("mean score", fontsize=14) @@ -161,35 +177,44 @@ # %% # Best Fingerprint Method / Performance from collections import defaultdict + res_dict = defaultdict(list) for i, row in df_training_stats.iterrows(): - fp_name = row['param_fp_transformer'] - if("Morgan" in str(fp_name)): + fp_name = row["param_fp_transformer"] + if "Morgan" in str(fp_name): res_dict[fp_name].append(row) for fp_type, rows in res_dict.items(): df = pd.DataFrame(rows) - df =df.sort_values(by="mean_test_score") - - #plot test score vs. approach - xlabels = map(lambda x: "_".join(x), zip(df.param_fp_transformer__nBits.astype(str), df.param_regressor__alpha.astype(str))) - - - plt.figure(figsize=[14,5]) + df = df.sort_values(by="mean_test_score") + + # plot test score vs. approach + xlabels = map( + lambda x: "_".join(x), + zip( + df.param_fp_transformer__fpSize.astype(str), + df.param_regressor__alpha.astype(str), + ), + ) + + plt.figure(figsize=[14, 5]) plt.bar(range(len(df)), df.mean_test_score, yerr=df.std_test_score) plt.xticks(range(len(df)), xlabels, rotation=90, fontsize=14) plt.ylabel("mean score", fontsize=14) plt.xlabel("bitsize_alpha", fontsize=14) - plt.title("Fingerprint Transformer "+str(fp_type).split("(")[0]+" per Bitsize", fontsize=18) + plt.title( + "Fingerprint Transformer " + str(fp_type).split("(")[0] + " per Bitsize", + fontsize=18, + ) pass # %% -#plot ALL test score vs. approach -df =df_training_stats.sort_values(by="mean_test_score") +# plot ALL test score vs. approach +df = df_training_stats.sort_values(by="mean_test_score") -plt.figure(figsize=[16,9]) +plt.figure(figsize=[16, 9]) plt.bar(range(len(df)), df.mean_test_score, yerr=df.std_test_score) plt.ylabel("mean score", fontsize=14) plt.xticks(range(len(df))[::5], df.param_fp_transformer[::5], rotation=90, fontsize=14) diff --git a/notebooks/10_pipeline_pandas_output.ipynb b/notebooks/10_pipeline_pandas_output.ipynb index 09275ca..7fcff28 100644 --- a/notebooks/10_pipeline_pandas_output.ipynb +++ b/notebooks/10_pipeline_pandas_output.ipynb @@ -2,6 +2,7 @@ "cells": [ { "cell_type": "markdown", + "id": "454d87b5", "metadata": {}, "source": [ "# Preserving feature information in DataFrames\n", @@ -14,12 +15,13 @@ { "cell_type": "code", "execution_count": 1, + "id": "cb457069", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:10.927499Z", - "iopub.status.busy": "2024-04-12T12:11:10.927303Z", - "iopub.status.idle": "2024-04-12T12:11:11.787084Z", - "shell.execute_reply": "2024-04-12T12:11:11.786420Z" + "iopub.execute_input": "2024-11-24T09:28:31.171627Z", + "iopub.status.busy": "2024-11-24T09:28:31.171255Z", + "iopub.status.idle": "2024-11-24T09:28:32.152283Z", + "shell.execute_reply": "2024-11-24T09:28:32.151641Z" } }, "outputs": [], @@ -42,12 +44,13 @@ { "cell_type": "code", "execution_count": 2, + "id": "bc72ca09", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:11.789862Z", - "iopub.status.busy": "2024-04-12T12:11:11.789572Z", - "iopub.status.idle": "2024-04-12T12:11:11.796138Z", - "shell.execute_reply": "2024-04-12T12:11:11.795520Z" + "iopub.execute_input": "2024-11-24T09:28:32.155106Z", + "iopub.status.busy": "2024-11-24T09:28:32.154793Z", + "iopub.status.idle": "2024-11-24T09:28:32.161683Z", + "shell.execute_reply": "2024-11-24T09:28:32.161035Z" } }, "outputs": [], @@ -60,6 +63,7 @@ }, { "cell_type": "markdown", + "id": "f482cac3", "metadata": {}, "source": [ "Let's split the dataset in training and test, so we will be able to use the test set to evaluate the performance of models trained on the training set." @@ -68,12 +72,13 @@ { "cell_type": "code", "execution_count": 3, + "id": "6019d09f", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:11.798853Z", - "iopub.status.busy": "2024-04-12T12:11:11.798596Z", - "iopub.status.idle": "2024-04-12T12:11:11.803335Z", - "shell.execute_reply": "2024-04-12T12:11:11.802809Z" + "iopub.execute_input": "2024-11-24T09:28:32.164164Z", + "iopub.status.busy": "2024-11-24T09:28:32.163946Z", + "iopub.status.idle": "2024-11-24T09:28:32.168382Z", + "shell.execute_reply": "2024-11-24T09:28:32.167874Z" } }, "outputs": [], @@ -84,12 +89,13 @@ { "cell_type": "code", "execution_count": 4, + "id": "fe9efa0e", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:11.805768Z", - "iopub.status.busy": "2024-04-12T12:11:11.805514Z", - "iopub.status.idle": "2024-04-12T12:11:11.809489Z", - "shell.execute_reply": "2024-04-12T12:11:11.808992Z" + "iopub.execute_input": "2024-11-24T09:28:32.171037Z", + "iopub.status.busy": "2024-11-24T09:28:32.170722Z", + "iopub.status.idle": "2024-11-24T09:28:32.174941Z", + "shell.execute_reply": "2024-11-24T09:28:32.174393Z" } }, "outputs": [], @@ -105,6 +111,7 @@ }, { "cell_type": "markdown", + "id": "7b4cca39", "metadata": {}, "source": [ "## Descriptors pipeline that returns DataFrames\n", @@ -122,26 +129,432 @@ { "cell_type": "code", "execution_count": 5, + "id": "33ce774b", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:11.811852Z", - "iopub.status.busy": "2024-04-12T12:11:11.811625Z", - "iopub.status.idle": "2024-04-12T12:11:11.827762Z", - "shell.execute_reply": "2024-04-12T12:11:11.827206Z" + "iopub.execute_input": "2024-11-24T09:28:32.177459Z", + "iopub.status.busy": "2024-11-24T09:28:32.177241Z", + "iopub.status.idle": "2024-11-24T09:28:32.194656Z", + "shell.execute_reply": "2024-11-24T09:28:32.194079Z" } }, "outputs": [ { "data": { "text/html": [ - "
Pipeline(steps=[('smilestomoltransformer', SmilesToMolTransformer()),\n",
+       "
Pipeline(steps=[('smilestomoltransformer', SmilesToMolTransformer()),\n",
        "                ('standardizer', Standardizer()),\n",
        "                ('moleculardescriptortransformer',\n",
        "                 MolecularDescriptorTransformer(desc_list=['MaxAbsEStateIndex',\n",
        "                                                           'MaxEStateIndex',\n",
        "                                                           'MinAbsEStateIndex',\n",
        "                                                           'MinEStateIndex',\n",
-       "                                                           'qed', 'MolWt',\n",
+       "                                                           'qed', 'SPS',\n",
+       "                                                           'MolWt',\n",
        "                                                           'HeavyAtomMolWt',\n",
        "                                                           'ExactMolWt',\n",
        "                                                           'NumValenceElectrons',\n",
@@ -162,15 +575,15 @@
        "                                                           'BCUT2D_MRHI',\n",
        "                                                           'BCUT2D_MRLOW',\n",
        "                                                           'AvgIpc', 'BalabanJ',\n",
-       "                                                           'BertzCT', 'Chi0',\n",
-       "                                                           'Chi0n', ...]))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "Pipeline(steps=[('smilestomoltransformer', SmilesToMolTransformer()),\n", @@ -218,7 +631,8 @@ " 'MaxEStateIndex',\n", " 'MinAbsEStateIndex',\n", " 'MinEStateIndex',\n", - " 'qed', 'MolWt',\n", + " 'qed', 'SPS',\n", + " 'MolWt',\n", " 'HeavyAtomMolWt',\n", " 'ExactMolWt',\n", " 'NumValenceElectrons',\n", @@ -239,8 +653,7 @@ " 'BCUT2D_MRHI',\n", " 'BCUT2D_MRLOW',\n", " 'AvgIpc', 'BalabanJ',\n", - " 'BertzCT', 'Chi0',\n", - " 'Chi0n', ...]))])" + " 'BertzCT', 'Chi0', ...]))])" ] }, "execution_count": 5, @@ -260,15 +673,541 @@ { "cell_type": "code", "execution_count": 6, + "id": "2cb55603", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:11.830134Z", - "iopub.status.busy": "2024-04-12T12:11:11.829918Z", - "iopub.status.idle": "2024-04-12T12:11:13.735020Z", - "shell.execute_reply": "2024-04-12T12:11:13.734450Z" + "iopub.execute_input": "2024-11-24T09:28:32.196995Z", + "iopub.status.busy": "2024-11-24T09:28:32.196792Z", + "iopub.status.idle": "2024-11-24T09:28:34.209289Z", + "shell.execute_reply": "2024-11-24T09:28:34.208617Z" } }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:32] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:33] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:34] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, { "data": { "text/html": [ @@ -295,11 +1234,11 @@ " MinAbsEStateIndex\n", " MinEStateIndex\n", " qed\n", + " SPS\n", " MolWt\n", " HeavyAtomMolWt\n", " ExactMolWt\n", " NumValenceElectrons\n", - " NumRadicalElectrons\n", " ...\n", " fr_sulfide\n", " fr_sulfonamd\n", @@ -321,11 +1260,11 @@ " 0.056985\n", " -0.432587\n", " 0.353101\n", - " 522.592\n", - " 490.336\n", - " 522.233014\n", + " 14.289474\n", + " 522.591980\n", + " 490.335999\n", + " 522.233032\n", " 200.0\n", - " 0.0\n", " ...\n", " 0.0\n", " 0.0\n", @@ -345,11 +1284,11 @@ " 0.026212\n", " -0.050849\n", " 0.682187\n", - " 425.558\n", - " 398.342\n", - " 425.188546\n", + " 16.033333\n", + " 425.558014\n", + " 398.342010\n", + " 425.188538\n", " 158.0\n", - " 0.0\n", " ...\n", " 0.0\n", " 0.0\n", @@ -369,11 +1308,11 @@ " 0.266700\n", " -0.413763\n", " 0.443905\n", - " 465.588\n", - " 432.324\n", - " 465.259169\n", + " 15.852942\n", + " 465.588013\n", + " 432.324005\n", + " 465.259155\n", " 180.0\n", - " 0.0\n", " ...\n", " 0.0\n", " 0.0\n", @@ -388,16 +1327,16 @@ " \n", " \n", " 3\n", - " 12.725824\n", - " 12.725824\n", + " 12.725823\n", + " 12.725823\n", " 0.052996\n", " -0.052996\n", " 0.577709\n", - " 478.468\n", - " 445.204\n", - " 477.206216\n", + " 17.812500\n", + " 478.467987\n", + " 445.204010\n", + " 477.206207\n", " 174.0\n", - " 0.0\n", " ...\n", " 0.0\n", " 0.0\n", @@ -407,7 +1346,7 @@ " 0.0\n", " 0.0\n", " 0.0\n", - " 1.0\n", + " 0.0\n", " 0.0\n", " \n", " \n", @@ -417,11 +1356,11 @@ " 0.898244\n", " 0.898244\n", " 0.658108\n", - " 246.313\n", - " 232.201\n", - " 246.115698\n", + " 13.052631\n", + " 246.313004\n", + " 232.201004\n", + " 246.115692\n", " 92.0\n", - " 0.0\n", " ...\n", " 0.0\n", " 0.0\n", @@ -465,11 +1404,11 @@ " 0.175664\n", " 0.175664\n", " 0.916154\n", - " 312.240\n", - " 293.088\n", - " 311.084370\n", + " 35.700001\n", + " 312.239990\n", + " 293.088013\n", + " 311.084381\n", " 108.0\n", - " 0.0\n", " ...\n", " 0.0\n", " 0.0\n", @@ -489,11 +1428,11 @@ " 0.420312\n", " 0.420312\n", " 0.378112\n", - " 465.645\n", - " 430.365\n", + " 21.714285\n", + " 465.644989\n", + " 430.364990\n", " 465.289246\n", " 180.0\n", - " 0.0\n", " ...\n", " 0.0\n", " 0.0\n", @@ -513,11 +1452,11 @@ " 0.300870\n", " -4.299737\n", " 0.919340\n", - " 328.378\n", - " 305.194\n", - " 328.176248\n", + " 23.565218\n", + " 328.377991\n", + " 305.194000\n", + " 328.176239\n", " 128.0\n", - " 0.0\n", " ...\n", " 0.0\n", " 0.0\n", @@ -537,11 +1476,11 @@ " 0.127623\n", " -0.127623\n", " 0.918995\n", - " 323.223\n", - " 307.095\n", - " 322.063968\n", + " 19.428572\n", + " 323.222992\n", + " 307.095001\n", + " 322.063965\n", " 110.0\n", - " 0.0\n", " ...\n", " 0.0\n", " 0.0\n", @@ -561,11 +1500,11 @@ " 0.086367\n", " 0.086367\n", " 0.911854\n", - " 296.414\n", - " 272.222\n", - " 296.188863\n", + " 17.136364\n", + " 296.414001\n", + " 272.221985\n", + " 296.188873\n", " 116.0\n", - " 0.0\n", " ...\n", " 0.0\n", " 0.0\n", @@ -580,7 +1519,7 @@ " \n", " \n", "\n", - "

159 rows × 209 columns

\n", + "

159 rows × 210 columns

\n", "" ], "text/plain": [ @@ -588,7 +1527,7 @@ "0 13.448610 13.448610 0.056985 -0.432587 \n", "1 12.863074 12.863074 0.026212 -0.050849 \n", "2 13.424788 13.424788 0.266700 -0.413763 \n", - "3 12.725824 12.725824 0.052996 -0.052996 \n", + "3 12.725823 12.725823 0.052996 -0.052996 \n", "4 6.356910 6.356910 0.898244 0.898244 \n", ".. ... ... ... ... \n", "154 6.217065 6.217065 0.175664 0.175664 \n", @@ -597,31 +1536,31 @@ "157 6.238476 6.238476 0.127623 -0.127623 \n", "158 6.371723 6.371723 0.086367 0.086367 \n", "\n", - " qed MolWt HeavyAtomMolWt ExactMolWt NumValenceElectrons \\\n", - "0 0.353101 522.592 490.336 522.233014 200.0 \n", - "1 0.682187 425.558 398.342 425.188546 158.0 \n", - "2 0.443905 465.588 432.324 465.259169 180.0 \n", - "3 0.577709 478.468 445.204 477.206216 174.0 \n", - "4 0.658108 246.313 232.201 246.115698 92.0 \n", - ".. ... ... ... ... ... \n", - "154 0.916154 312.240 293.088 311.084370 108.0 \n", - "155 0.378112 465.645 430.365 465.289246 180.0 \n", - "156 0.919340 328.378 305.194 328.176248 128.0 \n", - "157 0.918995 323.223 307.095 322.063968 110.0 \n", - "158 0.911854 296.414 272.222 296.188863 116.0 \n", - "\n", - " NumRadicalElectrons ... fr_sulfide fr_sulfonamd fr_sulfone \\\n", - "0 0.0 ... 0.0 0.0 0.0 \n", - "1 0.0 ... 0.0 0.0 0.0 \n", - "2 0.0 ... 0.0 0.0 0.0 \n", - "3 0.0 ... 0.0 0.0 0.0 \n", - "4 0.0 ... 0.0 0.0 0.0 \n", + " qed SPS MolWt HeavyAtomMolWt ExactMolWt \\\n", + "0 0.353101 14.289474 522.591980 490.335999 522.233032 \n", + "1 0.682187 16.033333 425.558014 398.342010 425.188538 \n", + "2 0.443905 15.852942 465.588013 432.324005 465.259155 \n", + "3 0.577709 17.812500 478.467987 445.204010 477.206207 \n", + "4 0.658108 13.052631 246.313004 232.201004 246.115692 \n", + ".. ... ... ... ... ... \n", + "154 0.916154 35.700001 312.239990 293.088013 311.084381 \n", + "155 0.378112 21.714285 465.644989 430.364990 465.289246 \n", + "156 0.919340 23.565218 328.377991 305.194000 328.176239 \n", + "157 0.918995 19.428572 323.222992 307.095001 322.063965 \n", + "158 0.911854 17.136364 296.414001 272.221985 296.188873 \n", + "\n", + " NumValenceElectrons ... fr_sulfide fr_sulfonamd fr_sulfone \\\n", + "0 200.0 ... 0.0 0.0 0.0 \n", + "1 158.0 ... 0.0 0.0 0.0 \n", + "2 180.0 ... 0.0 0.0 0.0 \n", + "3 174.0 ... 0.0 0.0 0.0 \n", + "4 92.0 ... 0.0 0.0 0.0 \n", ".. ... ... ... ... ... \n", - "154 0.0 ... 0.0 0.0 0.0 \n", - "155 0.0 ... 0.0 0.0 0.0 \n", - "156 0.0 ... 0.0 0.0 0.0 \n", - "157 0.0 ... 0.0 0.0 0.0 \n", - "158 0.0 ... 0.0 0.0 0.0 \n", + "154 108.0 ... 0.0 0.0 0.0 \n", + "155 180.0 ... 0.0 0.0 0.0 \n", + "156 128.0 ... 0.0 0.0 0.0 \n", + "157 110.0 ... 0.0 0.0 0.0 \n", + "158 116.0 ... 0.0 0.0 0.0 \n", "\n", " fr_term_acetylene fr_tetrazole fr_thiazole fr_thiocyan fr_thiophene \\\n", "0 0.0 0.0 0.0 0.0 0.0 \n", @@ -640,7 +1579,7 @@ "0 0.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", - "3 1.0 0.0 \n", + "3 0.0 0.0 \n", "4 0.0 0.0 \n", ".. ... ... \n", "154 0.0 0.0 \n", @@ -649,7 +1588,7 @@ "157 0.0 0.0 \n", "158 0.0 0.0 \n", "\n", - "[159 rows x 209 columns]" + "[159 rows x 210 columns]" ] }, "execution_count": 6, @@ -664,6 +1603,7 @@ }, { "cell_type": "markdown", + "id": "40d6024a", "metadata": {}, "source": [ "All scikit-mol transformers are now compatible with the scikit-learn [set_output API](https://scikit-learn.org/stable/auto_examples/miscellaneous/plot_set_output.html).\n", @@ -675,25 +1615,430 @@ { "cell_type": "code", "execution_count": 7, + "id": "f56c539c", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:13.737598Z", - "iopub.status.busy": "2024-04-12T12:11:13.737383Z", - "iopub.status.idle": "2024-04-12T12:11:13.745000Z", - "shell.execute_reply": "2024-04-12T12:11:13.744500Z" + "iopub.execute_input": "2024-11-24T09:28:34.211959Z", + "iopub.status.busy": "2024-11-24T09:28:34.211717Z", + "iopub.status.idle": "2024-11-24T09:28:34.220439Z", + "shell.execute_reply": "2024-11-24T09:28:34.219746Z" } }, "outputs": [ { "data": { "text/html": [ - "
Pipeline(steps=[('smilestomoltransformer', SmilesToMolTransformer()),\n",
+       "
Pipeline(steps=[('smilestomoltransformer', SmilesToMolTransformer()),\n",
        "                ('standardizer', Standardizer()),\n",
        "                ('morganfingerprinttransformer',\n",
-       "                 MorganFingerprintTransformer())])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
SmilesToMolTransformer()
Standardizer()
MorganFingerprintTransformer()
" ], "text/plain": [ "Pipeline(steps=[('smilestomoltransformer', SmilesToMolTransformer()),\n", @@ -719,12 +2064,13 @@ { "cell_type": "code", "execution_count": 8, + "id": "781d1bc8", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:13.747288Z", - "iopub.status.busy": "2024-04-12T12:11:13.747073Z", - "iopub.status.idle": "2024-04-12T12:11:14.181998Z", - "shell.execute_reply": "2024-04-12T12:11:14.181388Z" + "iopub.execute_input": "2024-11-24T09:28:34.222936Z", + "iopub.status.busy": "2024-11-24T09:28:34.222716Z", + "iopub.status.idle": "2024-11-24T09:28:34.618391Z", + "shell.execute_reply": "2024-11-24T09:28:34.617722Z" } }, "outputs": [ @@ -1123,6 +2469,7 @@ }, { "cell_type": "markdown", + "id": "19a13ca2", "metadata": {}, "source": [ "## Analyze feature importance of regression pipeline\n", @@ -1135,31 +2482,438 @@ { "cell_type": "code", "execution_count": 9, + "id": "4872ecab", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:14.184553Z", - "iopub.status.busy": "2024-04-12T12:11:14.184344Z", - "iopub.status.idle": "2024-04-12T12:11:14.201432Z", - "shell.execute_reply": "2024-04-12T12:11:14.200853Z" + "iopub.execute_input": "2024-11-24T09:28:34.621103Z", + "iopub.status.busy": "2024-11-24T09:28:34.620854Z", + "iopub.status.idle": "2024-11-24T09:28:34.640701Z", + "shell.execute_reply": "2024-11-24T09:28:34.640027Z" } }, "outputs": [ { "data": { "text/html": [ - "
Pipeline(steps=[('smilestomoltransformer', SmilesToMolTransformer()),\n",
+       "
Pipeline(steps=[('smilestomoltransformer', SmilesToMolTransformer()),\n",
        "                ('standardizer', Standardizer()),\n",
        "                ('moleculardescriptortransformer',\n",
        "                 MolecularDescriptorTransformer(desc_list=['MaxAbsEStateIndex',\n",
        "                                                           'MaxEStateIndex',\n",
        "                                                           'MinAbsEStateIndex',\n",
        "                                                           'MinEStateIndex',\n",
-       "                                                           'qed', 'MolWt',\n",
+       "                                                           'qed', 'SPS',\n",
+       "                                                           'MolWt',\n",
        "                                                           'HeavyAtomMolWt',\n",
        "                                                           'ExactMolWt',\n",
        "                                                           'NumValenceElectrons',\n",
        "                                                           'NumRadicalElectrons',\n",
-       "                                                           'MaxPartialC...\n",
+       "                                                           'MaxPa...\n",
+       "                                                           'MaxAbsPartialCharge',\n",
        "                                                           'MinAbsPartialCharge',\n",
        "                                                           'FpDensityMorgan1',\n",
        "                                                           'FpDensityMorgan2',\n",
@@ -1173,22 +2927,23 @@
        "                                                           'BCUT2D_MRHI',\n",
        "                                                           'BCUT2D_MRLOW',\n",
        "                                                           'AvgIpc', 'BalabanJ',\n",
-       "                                                           'BertzCT', 'Chi0',\n",
-       "                                                           'Chi0n', ...])),\n",
+       "                                                           'BertzCT', 'Chi0', ...])),\n",
        "                ('standardscaler', StandardScaler()),\n",
-       "                ('randomforestregressor', RandomForestRegressor(max_depth=5))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
StandardScaler()
RandomForestRegressor(max_depth=5)
" ], "text/plain": [ "Pipeline(steps=[('smilestomoltransformer', SmilesToMolTransformer()),\n", @@ -1231,12 +2986,14 @@ " 'MaxEStateIndex',\n", " 'MinAbsEStateIndex',\n", " 'MinEStateIndex',\n", - " 'qed', 'MolWt',\n", + " 'qed', 'SPS',\n", + " 'MolWt',\n", " 'HeavyAtomMolWt',\n", " 'ExactMolWt',\n", " 'NumValenceElectrons',\n", " 'NumRadicalElectrons',\n", - " 'MaxPartialC...\n", + " 'MaxPa...\n", + " 'MaxAbsPartialCharge',\n", " 'MinAbsPartialCharge',\n", " 'FpDensityMorgan1',\n", " 'FpDensityMorgan2',\n", @@ -1250,8 +3007,7 @@ " 'BCUT2D_MRHI',\n", " 'BCUT2D_MRLOW',\n", " 'AvgIpc', 'BalabanJ',\n", - " 'BertzCT', 'Chi0',\n", - " 'Chi0n', ...])),\n", + " 'BertzCT', 'Chi0', ...])),\n", " ('standardscaler', StandardScaler()),\n", " ('randomforestregressor', RandomForestRegressor(max_depth=5))])" ] @@ -1279,15 +3035,680 @@ { "cell_type": "code", "execution_count": 10, + "id": "f0b2f44f", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:14.203799Z", - "iopub.status.busy": "2024-04-12T12:11:14.203546Z", - "iopub.status.idle": "2024-04-12T12:11:17.092871Z", - "shell.execute_reply": "2024-04-12T12:11:17.092220Z" + "iopub.execute_input": "2024-11-24T09:28:34.643396Z", + "iopub.status.busy": "2024-11-24T09:28:34.643148Z", + "iopub.status.idle": "2024-11-24T09:28:37.656343Z", + "shell.execute_reply": "2024-11-24T09:28:37.655703Z" } }, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:35] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:36] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:37] DEPRECATION WARNING: please use MorganGenerator\n" + ] + } + ], "source": [ "regression_pipeline.fit(smis_train, target_train)\n", "pred_test = regression_pipeline.predict(smis_test)" @@ -1295,6 +3716,7 @@ }, { "cell_type": "markdown", + "id": "3aa6802d", "metadata": {}, "source": [ "Let's define a simple function to compute regression metrics, and use it to evaluate the test set performance of the pipeline." @@ -1303,21 +3725,30 @@ { "cell_type": "code", "execution_count": 11, + "id": "8b59851a", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:17.095680Z", - "iopub.status.busy": "2024-04-12T12:11:17.095437Z", - "iopub.status.idle": "2024-04-12T12:11:17.102165Z", - "shell.execute_reply": "2024-04-12T12:11:17.101654Z" + "iopub.execute_input": "2024-11-24T09:28:37.658965Z", + "iopub.status.busy": "2024-11-24T09:28:37.658741Z", + "iopub.status.idle": "2024-11-24T09:28:37.666594Z", + "shell.execute_reply": "2024-11-24T09:28:37.665979Z" } }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/esben/python_envs/vscode/lib/python3.10/site-packages/sklearn/metrics/_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n", + " warnings.warn(\n" + ] + }, { "data": { "text/plain": [ - "{'RMSE': 0.8750229695931232,\n", - " 'MAE': 0.7227101414064178,\n", - " 'R2': 0.11946989572680278}" + "{'RMSE': 0.8736959928049254,\n", + " 'MAE': 0.707222432887994,\n", + " 'R2': 0.12213852746646214}" ] }, "execution_count": 11, @@ -1341,19 +3772,424 @@ { "cell_type": "code", "execution_count": 12, + "id": "68528957", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:17.104450Z", - "iopub.status.busy": "2024-04-12T12:11:17.104246Z", - "iopub.status.idle": "2024-04-12T12:11:17.108860Z", - "shell.execute_reply": "2024-04-12T12:11:17.108329Z" + "iopub.execute_input": "2024-11-24T09:28:37.668899Z", + "iopub.status.busy": "2024-11-24T09:28:37.668697Z", + "iopub.status.idle": "2024-11-24T09:28:37.673651Z", + "shell.execute_reply": "2024-11-24T09:28:37.672957Z" } }, "outputs": [ { "data": { "text/html": [ - "
RandomForestRegressor(max_depth=5)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
RandomForestRegressor(max_depth=5)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "RandomForestRegressor(max_depth=5)" @@ -1371,6 +4207,7 @@ }, { "cell_type": "markdown", + "id": "d8f32688", "metadata": {}, "source": [ "Since we used `set_output(transform=\"pandas\")` on the pipeline, the last step of the pipeline (the regression model) has the descriptor names in the `feature_names_in_` attribute. We can use them and the `feature_importances_` attribute to easily analyze the feature importances." @@ -1379,12 +4216,13 @@ { "cell_type": "code", "execution_count": 13, + "id": "24011e90", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:17.111179Z", - "iopub.status.busy": "2024-04-12T12:11:17.110981Z", - "iopub.status.idle": "2024-04-12T12:11:17.124017Z", - "shell.execute_reply": "2024-04-12T12:11:17.123440Z" + "iopub.execute_input": "2024-11-24T09:28:37.677173Z", + "iopub.status.busy": "2024-11-24T09:28:37.676324Z", + "iopub.status.idle": "2024-11-24T09:28:37.690728Z", + "shell.execute_reply": "2024-11-24T09:28:37.690184Z" } }, "outputs": [ @@ -1417,27 +4255,27 @@ " \n", " 0\n", " MaxAbsEStateIndex\n", - " 0.003899\n", + " 0.002776\n", " \n", " \n", " 1\n", " MaxEStateIndex\n", - " 0.001640\n", + " 0.003859\n", " \n", " \n", " 2\n", " MinAbsEStateIndex\n", - " 0.002302\n", + " 0.006311\n", " \n", " \n", " 3\n", " MinEStateIndex\n", - " 0.002898\n", + " 0.004721\n", " \n", " \n", " 4\n", " qed\n", - " 0.008949\n", + " 0.007605\n", " \n", " \n", " ...\n", @@ -1445,50 +4283,50 @@ " ...\n", " \n", " \n", - " 204\n", + " 205\n", " fr_thiazole\n", " 0.000000\n", " \n", " \n", - " 205\n", + " 206\n", " fr_thiocyan\n", " 0.000000\n", " \n", " \n", - " 206\n", + " 207\n", " fr_thiophene\n", - " 0.000286\n", + " 0.000046\n", " \n", " \n", - " 207\n", + " 208\n", " fr_unbrch_alkane\n", - " 0.000020\n", + " 0.000000\n", " \n", " \n", - " 208\n", + " 209\n", " fr_urea\n", - " 0.000015\n", + " 0.000000\n", " \n", " \n", "\n", - "

209 rows × 2 columns

\n", + "

210 rows × 2 columns

\n", "" ], "text/plain": [ " feature importance\n", - "0 MaxAbsEStateIndex 0.003899\n", - "1 MaxEStateIndex 0.001640\n", - "2 MinAbsEStateIndex 0.002302\n", - "3 MinEStateIndex 0.002898\n", - "4 qed 0.008949\n", + "0 MaxAbsEStateIndex 0.002776\n", + "1 MaxEStateIndex 0.003859\n", + "2 MinAbsEStateIndex 0.006311\n", + "3 MinEStateIndex 0.004721\n", + "4 qed 0.007605\n", ".. ... ...\n", - "204 fr_thiazole 0.000000\n", - "205 fr_thiocyan 0.000000\n", - "206 fr_thiophene 0.000286\n", - "207 fr_unbrch_alkane 0.000020\n", - "208 fr_urea 0.000015\n", + "205 fr_thiazole 0.000000\n", + "206 fr_thiocyan 0.000000\n", + "207 fr_thiophene 0.000046\n", + "208 fr_unbrch_alkane 0.000000\n", + "209 fr_urea 0.000000\n", "\n", - "[209 rows x 2 columns]" + "[210 rows x 2 columns]" ] }, "execution_count": 13, @@ -1503,6 +4341,7 @@ }, { "cell_type": "markdown", + "id": "64ac369d", "metadata": {}, "source": [ "Sort the features by most to least important:" @@ -1511,12 +4350,13 @@ { "cell_type": "code", "execution_count": 14, + "id": "713d24f1", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:17.126745Z", - "iopub.status.busy": "2024-04-12T12:11:17.126312Z", - "iopub.status.idle": "2024-04-12T12:11:17.134339Z", - "shell.execute_reply": "2024-04-12T12:11:17.133744Z" + "iopub.execute_input": "2024-11-24T09:28:37.693255Z", + "iopub.status.busy": "2024-11-24T09:28:37.693046Z", + "iopub.status.idle": "2024-11-24T09:28:37.700756Z", + "shell.execute_reply": "2024-11-24T09:28:37.700214Z" } }, "outputs": [ @@ -1549,27 +4389,27 @@ " \n", " 0\n", " PEOE_VSA6\n", - " 0.145391\n", + " 0.147449\n", " \n", " \n", " 1\n", " VSA_EState5\n", - " 0.087551\n", + " 0.087963\n", " \n", " \n", " 2\n", " MaxAbsPartialCharge\n", - " 0.050707\n", + " 0.057491\n", " \n", " \n", " 3\n", " VSA_EState6\n", - " 0.032544\n", + " 0.034922\n", " \n", " \n", " 4\n", " SlogP_VSA6\n", - " 0.030168\n", + " 0.028875\n", " \n", " \n", " ...\n", @@ -1577,50 +4417,50 @@ " ...\n", " \n", " \n", - " 204\n", - " fr_isocyan\n", - " 0.000000\n", - " \n", - " \n", " 205\n", - " fr_isothiocyan\n", + " fr_hdrzine\n", " 0.000000\n", " \n", " \n", " 206\n", - " fr_ketone\n", + " fr_hdrzone\n", " 0.000000\n", " \n", " \n", " 207\n", - " fr_ketone_Topliss\n", + " fr_imidazole\n", " 0.000000\n", " \n", " \n", " 208\n", - " fr_C_S\n", + " fr_imide\n", + " 0.000000\n", + " \n", + " \n", + " 209\n", + " fr_urea\n", " 0.000000\n", " \n", " \n", "\n", - "

209 rows × 2 columns

\n", + "

210 rows × 2 columns

\n", "" ], "text/plain": [ " feature importance\n", - "0 PEOE_VSA6 0.145391\n", - "1 VSA_EState5 0.087551\n", - "2 MaxAbsPartialCharge 0.050707\n", - "3 VSA_EState6 0.032544\n", - "4 SlogP_VSA6 0.030168\n", + "0 PEOE_VSA6 0.147449\n", + "1 VSA_EState5 0.087963\n", + "2 MaxAbsPartialCharge 0.057491\n", + "3 VSA_EState6 0.034922\n", + "4 SlogP_VSA6 0.028875\n", ".. ... ...\n", - "204 fr_isocyan 0.000000\n", - "205 fr_isothiocyan 0.000000\n", - "206 fr_ketone 0.000000\n", - "207 fr_ketone_Topliss 0.000000\n", - "208 fr_C_S 0.000000\n", + "205 fr_hdrzine 0.000000\n", + "206 fr_hdrzone 0.000000\n", + "207 fr_imidazole 0.000000\n", + "208 fr_imide 0.000000\n", + "209 fr_urea 0.000000\n", "\n", - "[209 rows x 2 columns]" + "[210 rows x 2 columns]" ] }, "execution_count": 14, @@ -1636,12 +4476,13 @@ { "cell_type": "code", "execution_count": 15, + "id": "4b97778f", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:17.136673Z", - "iopub.status.busy": "2024-04-12T12:11:17.136477Z", - "iopub.status.idle": "2024-04-12T12:11:17.140656Z", - "shell.execute_reply": "2024-04-12T12:11:17.140049Z" + "iopub.execute_input": "2024-11-24T09:28:37.703238Z", + "iopub.status.busy": "2024-11-24T09:28:37.703004Z", + "iopub.status.idle": "2024-11-24T09:28:37.707129Z", + "shell.execute_reply": "2024-11-24T09:28:37.706557Z" } }, "outputs": [ @@ -1668,6 +4509,7 @@ }, { "cell_type": "markdown", + "id": "f79c93a0", "metadata": {}, "source": [ "## Including external features\n", @@ -1686,12 +4528,13 @@ { "cell_type": "code", "execution_count": 16, + "id": "bf8ddaf9", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:17.143445Z", - "iopub.status.busy": "2024-04-12T12:11:17.142907Z", - "iopub.status.idle": "2024-04-12T12:11:17.189933Z", - "shell.execute_reply": "2024-04-12T12:11:17.189323Z" + "iopub.execute_input": "2024-11-24T09:28:37.709630Z", + "iopub.status.busy": "2024-11-24T09:28:37.709389Z", + "iopub.status.idle": "2024-11-24T09:28:37.755699Z", + "shell.execute_reply": "2024-11-24T09:28:37.755108Z" } }, "outputs": [ @@ -2079,6 +4922,7 @@ }, { "cell_type": "markdown", + "id": "92dc6bf5", "metadata": {}, "source": [ "The CDDD features are stored in columns `cddd_1`, `cddd_2`, ..., `cddd_512`. The file has the identifier column `Ambit_InchiKey` that we can use to combine the CDDD features with the rest of the data:" @@ -2087,12 +4931,13 @@ { "cell_type": "code", "execution_count": 17, + "id": "db83be01", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:17.192512Z", - "iopub.status.busy": "2024-04-12T12:11:17.192295Z", - "iopub.status.idle": "2024-04-12T12:11:17.203133Z", - "shell.execute_reply": "2024-04-12T12:11:17.202544Z" + "iopub.execute_input": "2024-11-24T09:28:37.758200Z", + "iopub.status.busy": "2024-11-24T09:28:37.757994Z", + "iopub.status.idle": "2024-11-24T09:28:37.769048Z", + "shell.execute_reply": "2024-11-24T09:28:37.768408Z" } }, "outputs": [], @@ -2114,12 +4959,13 @@ { "cell_type": "code", "execution_count": 18, + "id": "dae995b7", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:17.205740Z", - "iopub.status.busy": "2024-04-12T12:11:17.205517Z", - "iopub.status.idle": "2024-04-12T12:11:17.209117Z", - "shell.execute_reply": "2024-04-12T12:11:17.208573Z" + "iopub.execute_input": "2024-11-24T09:28:37.771772Z", + "iopub.status.busy": "2024-11-24T09:28:37.771516Z", + "iopub.status.idle": "2024-11-24T09:28:37.775128Z", + "shell.execute_reply": "2024-11-24T09:28:37.774567Z" } }, "outputs": [], @@ -2134,6 +4980,7 @@ }, { "cell_type": "markdown", + "id": "2ec82fc8", "metadata": {}, "source": [ "Now we can define a pipeline that uses the original SMILES column to compute the descriptors available in scikit-mol, then concatenates them with the pre-computed CDDD features, and uses all of them to train the regression model. We will need a slightly more complex pipeline with column selectors and transformers. For more details on this technique, please refer to the [official documentation](https://scikit-learn.org/stable/modules/generated/sklearn.compose.make_column_selector.html).\n", @@ -2144,19 +4991,424 @@ { "cell_type": "code", "execution_count": 19, + "id": "dc6de049", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:17.211432Z", - "iopub.status.busy": "2024-04-12T12:11:17.211209Z", - "iopub.status.idle": "2024-04-12T12:11:17.241254Z", - "shell.execute_reply": "2024-04-12T12:11:17.240639Z" + "iopub.execute_input": "2024-11-24T09:28:37.777567Z", + "iopub.status.busy": "2024-11-24T09:28:37.777354Z", + "iopub.status.idle": "2024-11-24T09:28:37.808615Z", + "shell.execute_reply": "2024-11-24T09:28:37.808024Z" } }, "outputs": [ { "data": { "text/html": [ - "
ColumnTransformer(transformers=[('pipeline-1',\n",
+       "
ColumnTransformer(transformers=[('pipeline-1',\n",
        "                                 Pipeline(steps=[('smilestomoltransformer',\n",
        "                                                  SmilesToMolTransformer()),\n",
        "                                                 ('standardizer',\n",
@@ -2167,10 +5419,11 @@
        "                                                                                            'MinAbsEStateIndex',\n",
        "                                                                                            'MinEStateIndex',\n",
        "                                                                                            'qed',\n",
+       "                                                                                            'SPS',\n",
        "                                                                                            'MolWt',\n",
        "                                                                                            'HeavyAtomMolWt',\n",
        "                                                                                            'ExactMolWt',\n",
-       "                                                                                            'NumValenc...\n",
+       "                                                                                            'Num...\n",
        "                                                                                            'BCUT2D_LOGPHI',\n",
        "                                                                                            'BCUT2D_LOGPLOW',\n",
        "                                                                                            'BCUT2D_MRHI',\n",
@@ -2178,13 +5431,12 @@
        "                                                                                            'AvgIpc',\n",
        "                                                                                            'BalabanJ',\n",
        "                                                                                            'BertzCT',\n",
-       "                                                                                            'Chi0',\n",
-       "                                                                                            'Chi0n', ...]))]),\n",
-       "                                 <sklearn.compose._column_transformer.make_column_selector object at 0x7d90d7ff6e30>),\n",
+       "                                                                                            'Chi0', ...]))]),\n",
+       "                                 <sklearn.compose._column_transformer.make_column_selector object at 0x729f1412c520>),\n",
        "                                ('pipeline-2',\n",
        "                                 Pipeline(steps=[('functiontransformer',\n",
        "                                                  FunctionTransformer())]),\n",
-       "                                 <sklearn.compose._column_transformer.make_column_selector object at 0x7d90d7ff6a40>)])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
<sklearn.compose._column_transformer.make_column_selector object at 0x729f1412ebf0>
FunctionTransformer()
" ], "text/plain": [ "ColumnTransformer(transformers=[('pipeline-1',\n", @@ -2242,10 +5495,11 @@ " 'MinAbsEStateIndex',\n", " 'MinEStateIndex',\n", " 'qed',\n", + " 'SPS',\n", " 'MolWt',\n", " 'HeavyAtomMolWt',\n", " 'ExactMolWt',\n", - " 'NumValenc...\n", + " 'Num...\n", " 'BCUT2D_LOGPHI',\n", " 'BCUT2D_LOGPLOW',\n", " 'BCUT2D_MRHI',\n", @@ -2253,13 +5507,12 @@ " 'AvgIpc',\n", " 'BalabanJ',\n", " 'BertzCT',\n", - " 'Chi0',\n", - " 'Chi0n', ...]))]),\n", - " ),\n", + " 'Chi0', ...]))]),\n", + " ),\n", " ('pipeline-2',\n", " Pipeline(steps=[('functiontransformer',\n", " FunctionTransformer())]),\n", - " )])" + " )])" ] }, "execution_count": 19, @@ -2290,27 +5543,424 @@ { "cell_type": "code", "execution_count": 20, + "id": "6ee85c3c", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:17.243604Z", - "iopub.status.busy": "2024-04-12T12:11:17.243400Z", - "iopub.status.idle": "2024-04-12T12:11:17.309327Z", - "shell.execute_reply": "2024-04-12T12:11:17.308716Z" + "iopub.execute_input": "2024-11-24T09:28:37.811016Z", + "iopub.status.busy": "2024-11-24T09:28:37.810811Z", + "iopub.status.idle": "2024-11-24T09:28:37.883600Z", + "shell.execute_reply": "2024-11-24T09:28:37.882894Z" } }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/esben/python_envs/vscode/lib/python3.10/site-packages/sklearn/preprocessing/_function_transformer.py:345: UserWarning: With transform=\"pandas\", `func` should return a DataFrame to follow the set_output API.\n", - " warnings.warn(\n" - ] - }, { "data": { "text/html": [ - "
Pipeline(steps=[('columntransformer',\n",
+       "
Pipeline(steps=[('columntransformer',\n",
        "                 ColumnTransformer(transformers=[('pipeline-1',\n",
        "                                                  Pipeline(steps=[('smilestomoltransformer',\n",
        "                                                                   SmilesToMolTransformer()),\n",
@@ -2322,16 +5972,16 @@
        "                                                                                                             'MinAbsEStateIndex',\n",
        "                                                                                                             'MinEStateIndex',\n",
        "                                                                                                             'qed',\n",
-       "                                                                                                             'MolWt',\n",
-       "                                                                                                             'He...\n",
-       "                                                                                                             'Chi0n', ...]))]),\n",
-       "                                                  <sklearn.compose._column_transformer.make_column_selector object at 0x7d90d7ff6e30>),\n",
+       "                                                                                                             'SPS',\n",
+       "                                                                                                             'MolW...\n",
+       "                                                                                                             'Chi0', ...]))]),\n",
+       "                                                  <sklearn.compose._column_transformer.make_column_selector object at 0x729f1412c520>),\n",
        "                                                 ('pipeline-2',\n",
        "                                                  Pipeline(steps=[('functiontransformer',\n",
        "                                                                   FunctionTransformer())]),\n",
-       "                                                  <sklearn.compose._column_transformer.make_column_selector object at 0x7d90d7ff6a40>)])),\n",
+       "                                                  <sklearn.compose._column_transformer.make_column_selector object at 0x729f1412ebf0>)])),\n",
        "                ('standardscaler', StandardScaler()),\n",
-       "                ('randomforestregressor', RandomForestRegressor(max_depth=5))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
<sklearn.compose._column_transformer.make_column_selector object at 0x729f1412ebf0>
FunctionTransformer()
StandardScaler()
RandomForestRegressor(max_depth=5)
" ], "text/plain": [ "Pipeline(steps=[('columntransformer',\n", @@ -2411,14 +6062,14 @@ " 'MinAbsEStateIndex',\n", " 'MinEStateIndex',\n", " 'qed',\n", - " 'MolWt',\n", - " 'He...\n", - " 'Chi0n', ...]))]),\n", - " ),\n", + " 'SPS',\n", + " 'MolW...\n", + " 'Chi0', ...]))]),\n", + " ),\n", " ('pipeline-2',\n", " Pipeline(steps=[('functiontransformer',\n", " FunctionTransformer())]),\n", - " )])),\n", + " )])),\n", " ('standardscaler', StandardScaler()),\n", " ('randomforestregressor', RandomForestRegressor(max_depth=5))])" ] @@ -2440,21 +6091,672 @@ { "cell_type": "code", "execution_count": 21, + "id": "03960958", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:17.311956Z", - "iopub.status.busy": "2024-04-12T12:11:17.311703Z", - "iopub.status.idle": "2024-04-12T12:11:22.426687Z", - "shell.execute_reply": "2024-04-12T12:11:22.426127Z" + "iopub.execute_input": "2024-11-24T09:28:37.886041Z", + "iopub.status.busy": "2024-11-24T09:28:37.885822Z", + "iopub.status.idle": "2024-11-24T09:28:42.859220Z", + "shell.execute_reply": "2024-11-24T09:28:42.858489Z" } }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:38] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:39] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "[10:28:42] DEPRECATION WARNING: please use MorganGenerator\n", + "/home/esben/python_envs/vscode/lib/python3.10/site-packages/sklearn/metrics/_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n", + " warnings.warn(\n" + ] + }, { "data": { "text/plain": [ - "{'RMSE': 0.8103900599888356,\n", - " 'MAE': 0.686626458034167,\n", - " 'R2': 0.2498289359739927}" + "{'RMSE': 0.8314055216871027,\n", + " 'MAE': 0.7061918187521163,\n", + " 'R2': 0.2104167870060334}" ] }, "execution_count": 21, @@ -2471,6 +6773,7 @@ }, { "cell_type": "markdown", + "id": "c49ecd90", "metadata": {}, "source": [ "Let's combine the performance metrics obtained using only the scikit-mol descriptors as input features, and the performance metrics obtained using also the CDDD features:" @@ -2479,12 +6782,13 @@ { "cell_type": "code", "execution_count": 22, + "id": "6ce2fe53", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:22.429148Z", - "iopub.status.busy": "2024-04-12T12:11:22.428932Z", - "iopub.status.idle": "2024-04-12T12:11:22.436592Z", - "shell.execute_reply": "2024-04-12T12:11:22.435962Z" + "iopub.execute_input": "2024-11-24T09:28:42.861769Z", + "iopub.status.busy": "2024-11-24T09:28:42.861505Z", + "iopub.status.idle": "2024-11-24T09:28:42.869169Z", + "shell.execute_reply": "2024-11-24T09:28:42.868553Z" } }, "outputs": [ @@ -2517,15 +6821,15 @@ " \n", " \n", " descriptors\n", - " 0.875023\n", - " 0.722710\n", - " 0.119470\n", + " 0.873696\n", + " 0.707222\n", + " 0.122139\n", " \n", " \n", " combined\n", - " 0.810390\n", - " 0.686626\n", - " 0.249829\n", + " 0.831406\n", + " 0.706192\n", + " 0.210417\n", " \n", " \n", "\n", @@ -2533,8 +6837,8 @@ ], "text/plain": [ " RMSE MAE R2\n", - "descriptors 0.875023 0.722710 0.119470\n", - "combined 0.810390 0.686626 0.249829" + "descriptors 0.873696 0.707222 0.122139\n", + "combined 0.831406 0.706192 0.210417" ] }, "execution_count": 22, @@ -2549,6 +6853,7 @@ }, { "cell_type": "markdown", + "id": "83b7fd13", "metadata": {}, "source": [ "All performance metrics were improved by the includion of the CDDD features.\n", @@ -2558,12 +6863,13 @@ { "cell_type": "code", "execution_count": 23, + "id": "9c98ac71", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:22.439263Z", - "iopub.status.busy": "2024-04-12T12:11:22.438951Z", - "iopub.status.idle": "2024-04-12T12:11:22.453688Z", - "shell.execute_reply": "2024-04-12T12:11:22.453096Z" + "iopub.execute_input": "2024-11-24T09:28:42.872116Z", + "iopub.status.busy": "2024-11-24T09:28:42.871685Z", + "iopub.status.idle": "2024-11-24T09:28:42.886733Z", + "shell.execute_reply": "2024-11-24T09:28:42.886003Z" } }, "outputs": [ @@ -2595,28 +6901,28 @@ " \n", " \n", " 0\n", - " pipeline-2__cddd_102\n", - " 0.077721\n", + " pipeline-1__PEOE_VSA6\n", + " 0.078597\n", " \n", " \n", " 1\n", - " pipeline-1__PEOE_VSA6\n", - " 0.060011\n", + " pipeline-2__cddd_102\n", + " 0.064366\n", " \n", " \n", " 2\n", " pipeline-2__cddd_378\n", - " 0.042489\n", + " 0.045695\n", " \n", " \n", " 3\n", - " pipeline-2__cddd_369\n", - " 0.030706\n", + " pipeline-1__VSA_EState5\n", + " 0.032759\n", " \n", " \n", " 4\n", - " pipeline-1__VSA_EState5\n", - " 0.026225\n", + " pipeline-2__cddd_369\n", + " 0.030738\n", " \n", " \n", " ...\n", @@ -2624,50 +6930,50 @@ " ...\n", " \n", " \n", - " 716\n", - " pipeline-1__SMR_VSA5\n", - " 0.000000\n", - " \n", - " \n", " 717\n", - " pipeline-1__SMR_VSA8\n", + " pipeline-1__fr_lactam\n", " 0.000000\n", " \n", " \n", " 718\n", - " pipeline-1__RingCount\n", + " pipeline-1__fr_NH2\n", " 0.000000\n", " \n", " \n", " 719\n", - " pipeline-1__fr_isocyan\n", + " pipeline-1__SMR_VSA2\n", " 0.000000\n", " \n", " \n", " 720\n", - " pipeline-1__fr_azide\n", + " pipeline-1__fr_Imine\n", + " 0.000000\n", + " \n", + " \n", + " 721\n", + " pipeline-1__fr_phos_acid\n", " 0.000000\n", " \n", " \n", "\n", - "

721 rows × 2 columns

\n", + "

722 rows × 2 columns

\n", "" ], "text/plain": [ - " feature importance\n", - "0 pipeline-2__cddd_102 0.077721\n", - "1 pipeline-1__PEOE_VSA6 0.060011\n", - "2 pipeline-2__cddd_378 0.042489\n", - "3 pipeline-2__cddd_369 0.030706\n", - "4 pipeline-1__VSA_EState5 0.026225\n", - ".. ... ...\n", - "716 pipeline-1__SMR_VSA5 0.000000\n", - "717 pipeline-1__SMR_VSA8 0.000000\n", - "718 pipeline-1__RingCount 0.000000\n", - "719 pipeline-1__fr_isocyan 0.000000\n", - "720 pipeline-1__fr_azide 0.000000\n", - "\n", - "[721 rows x 2 columns]" + " feature importance\n", + "0 pipeline-1__PEOE_VSA6 0.078597\n", + "1 pipeline-2__cddd_102 0.064366\n", + "2 pipeline-2__cddd_378 0.045695\n", + "3 pipeline-1__VSA_EState5 0.032759\n", + "4 pipeline-2__cddd_369 0.030738\n", + ".. ... ...\n", + "717 pipeline-1__fr_lactam 0.000000\n", + "718 pipeline-1__fr_NH2 0.000000\n", + "719 pipeline-1__SMR_VSA2 0.000000\n", + "720 pipeline-1__fr_Imine 0.000000\n", + "721 pipeline-1__fr_phos_acid 0.000000\n", + "\n", + "[722 rows x 2 columns]" ] }, "execution_count": 23, @@ -2684,12 +6990,13 @@ { "cell_type": "code", "execution_count": 24, + "id": "9dbd2a9e", "metadata": { "execution": { - "iopub.execute_input": "2024-04-12T12:11:22.456280Z", - "iopub.status.busy": "2024-04-12T12:11:22.456047Z", - "iopub.status.idle": "2024-04-12T12:11:22.460105Z", - "shell.execute_reply": "2024-04-12T12:11:22.459602Z" + "iopub.execute_input": "2024-11-24T09:28:42.889250Z", + "iopub.status.busy": "2024-11-24T09:28:42.889020Z", + "iopub.status.idle": "2024-11-24T09:28:42.893106Z", + "shell.execute_reply": "2024-11-24T09:28:42.892486Z" } }, "outputs": [ @@ -2698,11 +7005,11 @@ "output_type": "stream", "text": [ "The 5 most important features are:\n", - "pipeline-2__cddd_102\n", "pipeline-1__PEOE_VSA6\n", + "pipeline-2__cddd_102\n", "pipeline-2__cddd_378\n", - "pipeline-2__cddd_369\n", - "pipeline-1__VSA_EState5\n" + "pipeline-1__VSA_EState5\n", + "pipeline-2__cddd_369\n" ] } ], @@ -2715,6 +7022,7 @@ }, { "cell_type": "markdown", + "id": "7b394662", "metadata": {}, "source": [ "As we can see, some CDDD features are among the most important features for the regression model.\n", @@ -2746,5 +7054,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 5 } diff --git a/notebooks/11_safe_inference.ipynb b/notebooks/11_safe_inference.ipynb index 6ee786e..93859ae 100644 --- a/notebooks/11_safe_inference.ipynb +++ b/notebooks/11_safe_inference.ipynb @@ -2,6 +2,7 @@ "cells": [ { "cell_type": "markdown", + "id": "f34dacf0", "metadata": {}, "source": [ "# Safe inference mode\n", @@ -15,22 +16,30 @@ }, { "cell_type": "code", - "execution_count": 12, - "metadata": {}, + "execution_count": 1, + "id": "ac780f4c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-24T09:28:44.417205Z", + "iopub.status.busy": "2024-11-24T09:28:44.417002Z", + "iopub.status.idle": "2024-11-24T09:28:45.205864Z", + "shell.execute_reply": "2024-11-24T09:28:45.205244Z" + } + }, "outputs": [ { "data": { "text/plain": [ - "array([[],\n", - " [],\n", - " [],\n", - " [],\n", + "array([[],\n", + " [],\n", + " [],\n", + " [],\n", " [InvalidMol('SmilesToMolTransformer(safe_inference_mode=True)', error='Invalid Molecule: Explicit valence for atom # 0 N, 4, is greater than permitted')],\n", " [InvalidMol('SmilesToMolTransformer(safe_inference_mode=True)', error='Invalid SMILES: I'm not a SMILES')]],\n", " dtype=object)" ] }, - "execution_count": 12, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } @@ -39,9 +48,10 @@ "from rdkit import Chem\n", "from scikit_mol.conversions import SmilesToMolTransformer\n", "\n", - "#We have some deprecation warnings, we are adressing them, but they just distract from this demonstration\n", + "# We have some deprecation warnings, we are adressing them, but they just distract from this demonstration\n", "import warnings\n", - "warnings.filterwarnings(\"ignore\", category=DeprecationWarning) \n", + "\n", + "warnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n", "\n", "smiles = [\"C1=CC=C(C=C1)F\", \"C1=CC=C(C=C1)O\", \"C1=CC=C(C=C1)N\", \"C1=CC=C(C=C1)Cl\"]\n", "smiles_with_invalid = smiles + [\"N(C)(C)(C)C\", \"I'm not a SMILES\"]\n", @@ -54,6 +64,7 @@ }, { "cell_type": "markdown", + "id": "bdd18682", "metadata": {}, "source": [ "Without the safe inference mode, the transformation would simply fail, but now we get the expected array back with our RDKit molecules and a last entry which is an object of the type InvalidMol. InvalidMol is simply a placeholder that tells what step failed the conversion and the error. InvalidMol evaluates to `False` in boolean contexts, so it gets easy to filter away and handle in `if`s and list comprehensions. As example:" @@ -61,19 +72,27 @@ }, { "cell_type": "code", - "execution_count": 13, - "metadata": {}, + "execution_count": 2, + "id": "44a6019c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-24T09:28:45.208884Z", + "iopub.status.busy": "2024-11-24T09:28:45.208436Z", + "iopub.status.idle": "2024-11-24T09:28:45.213259Z", + "shell.execute_reply": "2024-11-24T09:28:45.212730Z" + } + }, "outputs": [ { "data": { "text/plain": [ - "[array([], dtype=object),\n", - " array([], dtype=object),\n", - " array([], dtype=object),\n", - " array([], dtype=object)]" + "[array([], dtype=object),\n", + " array([], dtype=object),\n", + " array([], dtype=object),\n", + " array([], dtype=object)]" ] }, - "execution_count": 13, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -84,6 +103,7 @@ }, { "cell_type": "markdown", + "id": "176a44de", "metadata": {}, "source": [ "or" @@ -91,19 +111,27 @@ }, { "cell_type": "code", - "execution_count": 14, - "metadata": {}, + "execution_count": 3, + "id": "8286fd44", + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-24T09:28:45.215847Z", + "iopub.status.busy": "2024-11-24T09:28:45.215431Z", + "iopub.status.idle": "2024-11-24T09:28:45.219372Z", + "shell.execute_reply": "2024-11-24T09:28:45.218875Z" + } + }, "outputs": [ { "data": { "text/plain": [ - "array([,\n", - " ,\n", - " ,\n", - " ], dtype=object)" + "array([,\n", + " ,\n", + " ,\n", + " ], dtype=object)" ] }, - "execution_count": 14, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -115,6 +143,7 @@ }, { "cell_type": "markdown", + "id": "c7be8909", "metadata": {}, "source": [ "Having a failsafe SmilesToMol conversion leads us to next step, featurization. The transformers in safe inference mode now return a NumPy masked array instead of a regular NumPy array. It simply evaluates the incoming mols in a boolean context, so e.g. `None`, `np.nan` and other Python objects that evaluates to False will also get masked (i.e. if you use a dataframe with an ROMol column produced with the PandasTools utility)" @@ -122,31 +151,30 @@ }, { "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n" - ] + "execution_count": 4, + "id": "9a705642", + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-24T09:28:45.221712Z", + "iopub.status.busy": "2024-11-24T09:28:45.221465Z", + "iopub.status.idle": "2024-11-24T09:28:45.246566Z", + "shell.execute_reply": "2024-11-24T09:28:45.245960Z" }, + "lines_to_next_cell": 2 + }, + "outputs": [ { "data": { "text/plain": [ "masked_array(\n", - " data=[[0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1,\n", - " 0, 1, 1, 0],\n", - " [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1,\n", - " 0, 0, 1, 0],\n", - " [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1,\n", - " 0, 0, 0, 0],\n", - " [1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1,\n", - " 0, 1, 0, 1],\n", + " data=[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0,\n", + " 1.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0],\n", + " [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0,\n", + " 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],\n", + " [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0,\n", + " 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0],\n", + " [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0,\n", + " 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0],\n", " [--, --, --, --, --, --, --, --, --, --, --, --, --, --, --, --,\n", " --, --, --, --, --, --, --, --, --],\n", " [--, --, --, --, --, --, --, --, --, --, --, --, --, --, --, --,\n", @@ -169,11 +197,10 @@ " [ True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True, True, True,\n", " True, True, True, True, True, True, True]],\n", - " fill_value=999999,\n", - " dtype=int8)" + " fill_value=1e+20)" ] }, - "execution_count": 15, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -181,13 +208,14 @@ "source": [ "from scikit_mol.fingerprints import MorganFingerprintTransformer\n", "\n", - "mfp = MorganFingerprintTransformer(radius=2, nBits=25, safe_inference_mode=True)\n", + "mfp = MorganFingerprintTransformer(radius=2, fpSize=25, safe_inference_mode=True)\n", "fps = mfp.transform(mols_with_invalid)\n", - "fps\n" + "fps" ] }, { "cell_type": "markdown", + "id": "a5e2b301", "metadata": {}, "source": [ "However, currently scikit-learn models accepts masked arrays, but they do not respect the mask! So if you fed it directly to the model to train, it would seemingly work, but the invalid samples would all have the fill_value, meaning you could get weird results. Instead we need the last part of the puzzle, the SafeInferenceWrapper class." @@ -195,8 +223,17 @@ }, { "cell_type": "code", - "execution_count": 16, - "metadata": {}, + "execution_count": 5, + "id": "37987dc9", + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-24T09:28:45.249048Z", + "iopub.status.busy": "2024-11-24T09:28:45.248844Z", + "iopub.status.idle": "2024-11-24T09:28:45.318911Z", + "shell.execute_reply": "2024-11-24T09:28:45.318291Z" + }, + "lines_to_next_cell": 2 + }, "outputs": [ { "name": "stderr", @@ -212,7 +249,7 @@ "array([ 0., 1., 0., 1., nan, nan])" ] }, - "execution_count": 16, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -224,17 +261,19 @@ "\n", "regressor = LogisticRegression()\n", "wrapper = SafeInferenceWrapper(regressor, safe_inference_mode=True)\n", - "wrapper.fit(fps, [0,1,0,1,0,1])\n", - "wrapper.predict(fps)\n" + "wrapper.fit(fps, [0, 1, 0, 1, 0, 1])\n", + "wrapper.predict(fps)" ] }, { "cell_type": "markdown", + "id": "7aa1223f", "metadata": {}, "source": [] }, { "cell_type": "markdown", + "id": "f08d26d5", "metadata": {}, "source": [ "The prediction went fine both in fit and in prediction, where the result shows `nan` for the invalid entries. However, please note fit in sage_inference_mode is not recommended in a training session, but you are warned and not blocked, because maybe you know what you do and do it on purpose.\n", @@ -246,8 +285,16 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": {}, + "execution_count": 6, + "id": "51436aa8", + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-24T09:28:45.321557Z", + "iopub.status.busy": "2024-11-24T09:28:45.321253Z", + "iopub.status.idle": "2024-11-24T09:28:45.333442Z", + "shell.execute_reply": "2024-11-24T09:28:45.332830Z" + } + }, "outputs": [ { "name": "stdout", @@ -259,33 +306,21 @@ "With safe inference mode:\n", "[ 1. 0. 1. 0. nan nan]\n" ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n" - ] } ], "source": [ "from scikit_mol.safeinference import set_safe_inference_mode\n", "from sklearn.pipeline import Pipeline\n", "\n", - "pipe = Pipeline([\n", - " (\"smi2mol\", SmilesToMolTransformer()),\n", - " (\"mfp\", MorganFingerprintTransformer(radius=2, nBits=25)),\n", - " (\"safe_regressor\", SafeInferenceWrapper(LogisticRegression()))\n", - "])\n", + "pipe = Pipeline(\n", + " [\n", + " (\"smi2mol\", SmilesToMolTransformer()),\n", + " (\"mfp\", MorganFingerprintTransformer(radius=2, fpSize=25)),\n", + " (\"safe_regressor\", SafeInferenceWrapper(LogisticRegression())),\n", + " ]\n", + ")\n", "\n", - "pipe.fit(smiles, [1,0,1,0])\n", + "pipe.fit(smiles, [1, 0, 1, 0])\n", "\n", "print(\"Without safe inference mode:\")\n", "try:\n", @@ -302,6 +337,7 @@ }, { "cell_type": "markdown", + "id": "cf53d58f", "metadata": {}, "source": [ "We see that the prediction fail without safe inference mode, and proceeds when it's conveniently set by the `set_safe_inference_mode` utility. The model is now ready for save and reuse in a more failsafe manner :-)" @@ -309,6 +345,7 @@ }, { "cell_type": "markdown", + "id": "685e22fd", "metadata": {}, "source": [ "## Combining safe_inference_mode with pandas output\n", @@ -317,19 +354,17 @@ }, { "cell_type": "code", - "execution_count": 18, - "metadata": {}, + "execution_count": 7, + "id": "b8dbd88c", + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-24T09:28:45.336071Z", + "iopub.status.busy": "2024-11-24T09:28:45.335859Z", + "iopub.status.idle": "2024-11-24T09:28:45.351873Z", + "shell.execute_reply": "2024-11-24T09:28:45.351251Z" + } + }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n" - ] - }, { "data": { "text/html": [ @@ -504,7 +539,7 @@ "[4 rows x 25 columns]" ] }, - "execution_count": 18, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -520,6 +555,7 @@ }, { "cell_type": "markdown", + "id": "092ca859", "metadata": {}, "source": [ "Then lets see if we transform a batch with an invalid molecule:" @@ -527,19 +563,17 @@ }, { "cell_type": "code", - "execution_count": 19, - "metadata": {}, + "execution_count": 8, + "id": "710ceeb0", + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-24T09:28:45.354427Z", + "iopub.status.busy": "2024-11-24T09:28:45.354176Z", + "iopub.status.idle": "2024-11-24T09:28:45.377892Z", + "shell.execute_reply": "2024-11-24T09:28:45.377253Z" + } + }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n" - ] - }, { "data": { "text/html": [ @@ -770,7 +804,7 @@ "[6 rows x 25 columns]" ] }, - "execution_count": 19, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -782,24 +816,31 @@ }, { "cell_type": "markdown", + "id": "b87b46b3", "metadata": {}, "source": [ - "The second output is no longer integers, but floats. As most sklearn models cast input arrays to float32 internally, this difference is likely benign, but that's not guaranteed! Thus if you want to use pandas output for your production models, do check that the final outputs are the same for the valid rows, with and without a single invalid row. Alternatively the dtype for the output of the transformer can be switched to float for consistency." + "The second output is no longer integers, but floats. As most sklearn models cast input arrays to float32 internally, this difference is likely benign, but that's not guaranteed! Thus if you want to use pandas output for your production models, do check that the final outputs are the same for the valid rows, with and without a single invalid row. Alternatively the dtype for the output of the transformer can be switched to float for consistency if its supported by the transformer." ] }, { "cell_type": "code", - "execution_count": 20, - "metadata": {}, + "execution_count": 9, + "id": "bbfe1ec0", + "metadata": { + "execution": { + "iopub.execute_input": "2024-11-24T09:28:45.380434Z", + "iopub.status.busy": "2024-11-24T09:28:45.380233Z", + "iopub.status.idle": "2024-11-24T09:28:45.393639Z", + "shell.execute_reply": "2024-11-24T09:28:45.393095Z" + } + }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n", - "[17:02:50] DEPRECATION WARNING: please use MorganGenerator\n" + "/home/esben/git/scikit-mol/scikit_mol/fingerprints/morgan.py:69: DeprecationWarning: dtype is no longer supported, due to move to generator based fingerprints\n", + " self.dtype = dtype\n" ] }, { @@ -849,99 +890,99 @@ " \n", " \n", " 0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", - " 1.0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 1\n", + " 1\n", " ...\n", - " 0.0\n", - " 1.0\n", - " 0.0\n", - " 1.0\n", - " 1.0\n", - " 1.0\n", - " 0.0\n", - " 1.0\n", - " 1.0\n", - " 0.0\n", + " 0\n", + " 1\n", + " 0\n", + " 1\n", + " 1\n", + " 1\n", + " 0\n", + " 1\n", + " 1\n", + " 0\n", " \n", " \n", " 1\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", - " 1.0\n", - " 1.0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 1\n", + " 1\n", + " 1\n", " ...\n", - " 0.0\n", - " 1.0\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", - " 1.0\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", - " 0.0\n", + " 0\n", + " 1\n", + " 0\n", + " 0\n", + " 1\n", + " 1\n", + " 0\n", + " 0\n", + " 1\n", + " 0\n", " \n", " \n", " 2\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", - " 1.0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 1\n", + " 1\n", " ...\n", - " 0.0\n", - " 1.0\n", - " 0.0\n", - " 1.0\n", - " 1.0\n", - " 1.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", + " 0\n", + " 1\n", + " 0\n", + " 1\n", + " 1\n", + " 1\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", " \n", " \n", " 3\n", - " 1.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", - " 1.0\n", + " 1\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 0\n", + " 1\n", + " 1\n", " ...\n", - " 0.0\n", - " 1.0\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", - " 1.0\n", - " 0.0\n", - " 1.0\n", - " 0.0\n", - " 1.0\n", + " 0\n", + " 1\n", + " 0\n", + " 0\n", + " 1\n", + " 1\n", + " 0\n", + " 1\n", + " 0\n", + " 1\n", " \n", " \n", "\n", @@ -950,39 +991,41 @@ ], "text/plain": [ " fp_morgan_1 fp_morgan_2 fp_morgan_3 fp_morgan_4 fp_morgan_5 \\\n", - "0 0.0 0.0 0.0 0.0 0.0 \n", - "1 0.0 0.0 0.0 0.0 0.0 \n", - "2 0.0 0.0 0.0 0.0 0.0 \n", - "3 1.0 0.0 0.0 0.0 0.0 \n", + "0 0 0 0 0 0 \n", + "1 0 0 0 0 0 \n", + "2 0 0 0 0 0 \n", + "3 1 0 0 0 0 \n", "\n", " fp_morgan_6 fp_morgan_7 fp_morgan_8 fp_morgan_9 fp_morgan_10 ... \\\n", - "0 0.0 0.0 0.0 1.0 1.0 ... \n", - "1 0.0 0.0 1.0 1.0 1.0 ... \n", - "2 0.0 0.0 0.0 1.0 1.0 ... \n", - "3 0.0 0.0 0.0 1.0 1.0 ... \n", + "0 0 0 0 1 1 ... \n", + "1 0 0 1 1 1 ... \n", + "2 0 0 0 1 1 ... \n", + "3 0 0 0 1 1 ... \n", "\n", " fp_morgan_16 fp_morgan_17 fp_morgan_18 fp_morgan_19 fp_morgan_20 \\\n", - "0 0.0 1.0 0.0 1.0 1.0 \n", - "1 0.0 1.0 0.0 0.0 1.0 \n", - "2 0.0 1.0 0.0 1.0 1.0 \n", - "3 0.0 1.0 0.0 0.0 1.0 \n", + "0 0 1 0 1 1 \n", + "1 0 1 0 0 1 \n", + "2 0 1 0 1 1 \n", + "3 0 1 0 0 1 \n", "\n", " fp_morgan_21 fp_morgan_22 fp_morgan_23 fp_morgan_24 fp_morgan_25 \n", - "0 1.0 0.0 1.0 1.0 0.0 \n", - "1 1.0 0.0 0.0 1.0 0.0 \n", - "2 1.0 0.0 0.0 0.0 0.0 \n", - "3 1.0 0.0 1.0 0.0 1.0 \n", + "0 1 0 1 1 0 \n", + "1 1 0 0 1 0 \n", + "2 1 0 0 0 0 \n", + "3 1 0 1 0 1 \n", "\n", "[4 rows x 25 columns]" ] }, - "execution_count": 20, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "mfp_float = MorganFingerprintTransformer(radius=2, nBits=25, safe_inference_mode=True, dtype=np.float32)\n", + "mfp_float = MorganFingerprintTransformer(\n", + " radius=2, fpSize=25, safe_inference_mode=True, dtype=np.float32\n", + ")\n", "mfp_float.set_output(transform=\"pandas\")\n", "fps = mfp_float.transform(mols)\n", "fps" @@ -990,6 +1033,7 @@ }, { "cell_type": "markdown", + "id": "2c7b382c", "metadata": {}, "source": [ "I hope this new feature of Scikit-Mol will make it even easier to handle models, even when used in environments without SMILES or molecule validity guarantees." @@ -1019,5 +1063,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 5 } diff --git a/notebooks/11_safe_inference.py b/notebooks/11_safe_inference.py index 83d4d99..14b97f7 100644 --- a/notebooks/11_safe_inference.py +++ b/notebooks/11_safe_inference.py @@ -26,9 +26,10 @@ from rdkit import Chem from scikit_mol.conversions import SmilesToMolTransformer -#We have some deprecation warnings, we are adressing them, but they just distract from this demonstration +# We have some deprecation warnings, we are adressing them, but they just distract from this demonstration import warnings -warnings.filterwarnings("ignore", category=DeprecationWarning) + +warnings.filterwarnings("ignore", category=DeprecationWarning) smiles = ["C1=CC=C(C=C1)F", "C1=CC=C(C=C1)O", "C1=CC=C(C=C1)N", "C1=CC=C(C=C1)Cl"] smiles_with_invalid = smiles + ["N(C)(C)(C)C", "I'm not a SMILES"] @@ -57,7 +58,7 @@ # %% from scikit_mol.fingerprints import MorganFingerprintTransformer -mfp = MorganFingerprintTransformer(radius=2, nBits=25, safe_inference_mode=True) +mfp = MorganFingerprintTransformer(radius=2, fpSize=25, safe_inference_mode=True) fps = mfp.transform(mols_with_invalid) fps @@ -72,7 +73,7 @@ regressor = LogisticRegression() wrapper = SafeInferenceWrapper(regressor, safe_inference_mode=True) -wrapper.fit(fps, [0,1,0,1,0,1]) +wrapper.fit(fps, [0, 1, 0, 1, 0, 1]) wrapper.predict(fps) @@ -90,13 +91,15 @@ from scikit_mol.safeinference import set_safe_inference_mode from sklearn.pipeline import Pipeline -pipe = Pipeline([ - ("smi2mol", SmilesToMolTransformer()), - ("mfp", MorganFingerprintTransformer(radius=2, nBits=25)), - ("safe_regressor", SafeInferenceWrapper(LogisticRegression())) -]) +pipe = Pipeline( + [ + ("smi2mol", SmilesToMolTransformer()), + ("mfp", MorganFingerprintTransformer(radius=2, fpSize=25)), + ("safe_regressor", SafeInferenceWrapper(LogisticRegression())), + ] +) -pipe.fit(smiles, [1,0,1,0]) +pipe.fit(smiles, [1, 0, 1, 0]) print("Without safe inference mode:") try: @@ -133,10 +136,12 @@ fps # %% [markdown] -# The second output is no longer integers, but floats. As most sklearn models cast input arrays to float32 internally, this difference is likely benign, but that's not guaranteed! Thus if you want to use pandas output for your production models, do check that the final outputs are the same for the valid rows, with and without a single invalid row. Alternatively the dtype for the output of the transformer can be switched to float for consistency. +# The second output is no longer integers, but floats. As most sklearn models cast input arrays to float32 internally, this difference is likely benign, but that's not guaranteed! Thus if you want to use pandas output for your production models, do check that the final outputs are the same for the valid rows, with and without a single invalid row. Alternatively the dtype for the output of the transformer can be switched to float for consistency if its supported by the transformer. # %% -mfp_float = MorganFingerprintTransformer(radius=2, nBits=25, safe_inference_mode=True, dtype=np.float32) +mfp_float = MorganFingerprintTransformer( + radius=2, fpSize=25, safe_inference_mode=True, dtype=np.float32 +) mfp_float.set_output(transform="pandas") fps = mfp_float.transform(mols) fps