diff --git a/README.md b/README.md
index 8b80801..ba30f26 100644
--- a/README.md
+++ b/README.md
@@ -20,20 +20,29 @@ The first draft for the project was created at the [RDKIT UGM 2022 hackathon](ht
## Implemented
-* Transformer Classes
- * SmilesToMol
- * Desc2DTransformer
- * MACCSTransformer
- * RDKitFPTransformer
+* descriptors
+ * MolecularDescriptorTransformer
+
+* fingerprints
+ * MorganFingerprintTransformer
+ * MACCSKeysFingerprintTransformer
+ * RDKitFingerprintTransformer
* AtomPairFingerprintTransformer
* TopologicalTorsionFingerprintTransformer
- * MorganTransformer
+ * MHFingerprintTransformer
* SECFingerprintTransformer
+ * AvalonFingerprintTransformer
-
-* Utilities
+* conversions
+ * SmilesToMol
+
+* standardizer
+ * Standardizer
+
+* utilities
* CheckSmilesSanitazion
+
## Installation
Users can install latest tagged release from pip
diff --git a/scikit_mol/fingerprints.py b/scikit_mol/fingerprints.py
index 719e236..81bc43b 100644
--- a/scikit_mol/fingerprints.py
+++ b/scikit_mol/fingerprints.py
@@ -8,6 +8,7 @@
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import rdFingerprintGenerator
from rdkit.Chem import rdMHFPFingerprint
+from rdkit.Avalon import pyAvalonTools
import numpy as np
import pandas as pd
@@ -448,7 +449,45 @@ def _mol2fp(self, mol):
useChirality=bool(self.useChirality), useBondTypes=bool(self.useBondTypes)
)
+class AvalonFingerprintTransformer(FpsTransformer):
+ # Fingerprint from the Avalon toolkeit, https://doi.org/10.1021/ci050413p
+ def __init__(self, nBits:int = 512, isQuery:bool = False, resetVect:bool = False, bitFlags:int = 15761407, useCounts:bool = False, parallel: Union[bool, int] = False,):
+ """ Transform RDKit mols into Count or bit-based Avalon Fingerprints
+ Parameters
+ ----------
+ nBits : int, optional
+ Size of the fingerprint, by default 512
+ isQuery : bool, optional
+ use the fingerprint for a query structure, by default False
+ resetVect : bool, optional
+ reset vector, by default False NB: only used in GetAvalonFP (not for GetAvalonCountFP)
+ bitFlags : int, optional
+ Substructure fingerprint (32767) or similarity fingerprint (15761407) by default 15761407
+ useCounts : bool, optional
+ If toggled will create the count and not bit-based fingerprint, by default False
+ """
+ super().__init__(parallel = parallel)
+ self.nBits = nBits
+ self.isQuery = isQuery
+ self.resetVect = resetVect
+ self.bitFlags = bitFlags
+ self.useCounts = useCounts
+
+ def _mol2fp(self, mol):
+ if self.useCounts:
+ return pyAvalonTools.GetAvalonCountFP(mol,
+ nBits=int(self.nBits),
+ isQuery=bool(self.isQuery),
+ bitFlags=int(self.bitFlags)
+ )
+ else:
+ return pyAvalonTools.GetAvalonFP(mol,
+ nBits=int(self.nBits),
+ isQuery=bool(self.isQuery),
+ resetVect=bool(self.resetVect),
+ bitFlags=int(self.bitFlags)
+ )
def parallel_helper(args):
diff --git a/tests/test_fptransformers.py b/tests/test_fptransformers.py
index 1975e86..90000b5 100644
--- a/tests/test_fptransformers.py
+++ b/tests/test_fptransformers.py
@@ -7,7 +7,7 @@
from fixtures import mols_list, smiles_list, fingerprint, chiral_smiles_list, chiral_mols_list
from sklearn import clone
-from scikit_mol.fingerprints import MorganFingerprintTransformer, MACCSKeysFingerprintTransformer, RDKitFingerprintTransformer, AtomPairFingerprintTransformer, TopologicalTorsionFingerprintTransformer, SECFingerprintTransformer, MHFingerprintTransformer
+from scikit_mol.fingerprints import MorganFingerprintTransformer, MACCSKeysFingerprintTransformer, RDKitFingerprintTransformer, AtomPairFingerprintTransformer, TopologicalTorsionFingerprintTransformer, SECFingerprintTransformer, MHFingerprintTransformer, AvalonFingerprintTransformer
@@ -39,6 +39,9 @@ def secfp_transformer():
def mhfp_transformer():
return MHFingerprintTransformer()
+@pytest.fixture
+def avalon_transformer():
+ return AvalonFingerprintTransformer()
def test_fpstransformer_fp2array(morgan_transformer, fingerprint):
fp = morgan_transformer._fp2array(fingerprint)
@@ -54,8 +57,8 @@ def test_fpstransformer_transform_mol(morgan_transformer, mols_list):
assert(fp.shape == (2048,))
assert(fp.sum() == 14)
-def test_clonability(maccs_transformer, morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, secfp_transformer, mhfp_transformer):
- for t in [maccs_transformer, morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, secfp_transformer, mhfp_transformer]:
+def test_clonability(maccs_transformer, morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, secfp_transformer, mhfp_transformer, avalon_transformer):
+ for t in [maccs_transformer, morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, secfp_transformer, mhfp_transformer, avalon_transformer]:
params = t.get_params()
t2 = clone(t)
params_2 = t2.get_params()
@@ -64,8 +67,8 @@ def test_clonability(maccs_transformer, morgan_transformer, rdkit_transformer, a
#Cloned transformers should not be the same object
assert t2 != t
-def test_set_params(morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, secfp_transformer, mhfp_transformer):
- for t in [morgan_transformer, atompair_transformer, topologicaltorsion_transformer]:
+def test_set_params(morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, secfp_transformer, mhfp_transformer, avalon_transformer):
+ for t in [morgan_transformer, atompair_transformer, topologicaltorsion_transformer, avalon_transformer]:
params = t.get_params()
#change extracted dictionary
params['nBits'] = 4242
@@ -96,11 +99,11 @@ def test_set_params(morgan_transformer, rdkit_transformer, atompair_transformer,
params_2 = t.get_params()
assert all([ params[key] == params_2[key] for key in params.keys()])
-def test_transform(mols_list, morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, secfp_transformer, mhfp_transformer):
+def test_transform(mols_list, morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, secfp_transformer, mhfp_transformer, avalon_transformer):
#Test different types of input
for mols in [mols_list, np.array(mols_list), pd.Series(mols_list)]:
#Test the different transformers
- for t in [morgan_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, rdkit_transformer, secfp_transformer, mhfp_transformer]:
+ for t in [morgan_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, rdkit_transformer, secfp_transformer, mhfp_transformer, avalon_transformer]:
params = t.get_params()
fps = t.transform(mols)
#Assert that the same length of input and output
@@ -116,11 +119,11 @@ def test_transform(mols_list, morgan_transformer, rdkit_transformer, atompair_tr
assert len(fps[0]) == fpsize
-def test_transform_parallel(mols_list, morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, secfp_transformer, mhfp_transformer):
+def test_transform_parallel(mols_list, morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, secfp_transformer, mhfp_transformer, avalon_transformer):
#Test different types of input
for mols in [mols_list, np.array(mols_list), pd.Series(mols_list)]:
#Test the different transformers
- for t in [morgan_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, rdkit_transformer, secfp_transformer, mhfp_transformer]:
+ for t in [morgan_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, rdkit_transformer, secfp_transformer, mhfp_transformer, avalon_transformer]:
t.set_params(parallel=True)
params = t.get_params()
fps = t.transform(mols)
@@ -138,9 +141,9 @@ def test_transform_parallel(mols_list, morgan_transformer, rdkit_transformer, at
assert len(fps[0]) == fpsize
-def test_picklable(morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, secfp_transformer):
+def test_picklable(morgan_transformer, rdkit_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, secfp_transformer, avalon_transformer):
#Test the different transformers
- for t in [morgan_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, rdkit_transformer, secfp_transformer]:
+ for t in [morgan_transformer, atompair_transformer, topologicaltorsion_transformer, maccs_transformer, rdkit_transformer, secfp_transformer, avalon_transformer]:
with tempfile.NamedTemporaryFile() as f:
pickle.dump(t, f)
f.seek(0)
@@ -248,4 +251,10 @@ def test_MHFingerprintTransformer(chiral_mols_list):
}
assert_transformer_set_params(MHFingerprintTransformer, new_params, chiral_mols_list)
-
+def test_AvalonFingerprintTransformer(chiral_mols_list):
+ new_params = {'nBits': 1024,
+ 'isQuery': True,
+ # 'resetVect': True, #TODO: this doesn't change the FP
+ 'bitFlags': 32767
+ }
+ assert_transformer_set_params(AvalonFingerprintTransformer, new_params, chiral_mols_list)
diff --git a/tests/test_parameter_types.py b/tests/test_parameter_types.py
index 6867dbb..f175c87 100644
--- a/tests/test_parameter_types.py
+++ b/tests/test_parameter_types.py
@@ -2,11 +2,11 @@
import numpy as np
from rdkit import Chem
from fixtures import mols_list, smiles_list
-from test_fptransformers import morgan_transformer, atompair_transformer, topologicaltorsion_transformer, rdkit_transformer
+from test_fptransformers import morgan_transformer, atompair_transformer, topologicaltorsion_transformer, rdkit_transformer, avalon_transformer
-def test_Transformer_exotic_types(mols_list, morgan_transformer,atompair_transformer, topologicaltorsion_transformer):
- for transformer in [morgan_transformer, atompair_transformer, topologicaltorsion_transformer]:
+def test_Transformer_exotic_types(mols_list, morgan_transformer,atompair_transformer, topologicaltorsion_transformer, avalon_transformer):
+ for transformer in [morgan_transformer, atompair_transformer, topologicaltorsion_transformer, avalon_transformer]:
params = transformer.get_params()
for useCounts in [np.bool_(True), np.bool_(False)]:
diff --git a/tests/test_transformers.py b/tests/test_transformers.py
index 17e0a94..a45acb1 100644
--- a/tests/test_transformers.py
+++ b/tests/test_transformers.py
@@ -13,7 +13,7 @@
from scikit_mol.conversions import SmilesToMolTransformer
from scikit_mol.fingerprints import MACCSKeysFingerprintTransformer, RDKitFingerprintTransformer, AtomPairFingerprintTransformer, \
TopologicalTorsionFingerprintTransformer, MorganFingerprintTransformer, SECFingerprintTransformer, \
- MHFingerprintTransformer
+ MHFingerprintTransformer, AvalonFingerprintTransformer
from fixtures import SLC6A4_subset
@@ -34,7 +34,8 @@ def test_transformer(SLC6A4_subset):
"MorganTransformer": [MorganFingerprintTransformer, False],
"MorganTransformer useCounts": [MorganFingerprintTransformer, True],
"SECFingerprintTransformer": [SECFingerprintTransformer, None],
- "MHFingerprintTransformer": [MHFingerprintTransformer, None]}
+ "MHFingerprintTransformer": [MHFingerprintTransformer, None],
+ 'AvalonFingerprintTransformer': [AvalonFingerprintTransformer, None]}
# fit on toy data and print train/test score if successful or collect the failed FP
failed_FP = []