Skip to content

Commit

Permalink
Added fingerprints to GeneralizedSubstruct search and extended SWIG w…
Browse files Browse the repository at this point in the history
…rappers (rdkit#6991)

* Added fingerprints to GeneralizedSubstruct search

* Small change to fire tests

* Edits from code review

* Updated swig - newobject not required for GeneralizedSubstruct fingerprint methods

* GeneralizedSubstruct only supports unique_ptr in SWIG4.1
  • Loading branch information
jones-gareth authored Dec 19, 2023
1 parent b6361ae commit 31de6b0
Show file tree
Hide file tree
Showing 8 changed files with 196 additions and 2 deletions.
2 changes: 1 addition & 1 deletion Code/GraphMol/GeneralizedSubstruct/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

rdkit_library(GeneralizedSubstruct
XQMol.cpp TextIO.cpp
LINK_LIBRARIES MolEnumerator TautomerQuery SubstructMatch SmilesParse GraphMol)
LINK_LIBRARIES MolEnumerator TautomerQuery SubstructMatch SmilesParse GraphMol Fingerprints)
target_compile_definitions(GeneralizedSubstruct PRIVATE RDKIT_GENERALIZEDSUBSTRUCT_BUILD)

rdkit_headers(XQMol.h DEST GraphMol/GeneralizedSubstruct)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,10 @@ BOOST_PYTHON_MODULE(rdGeneralizedSubstruct) {
.def("InitFromJSON", &ExtendedQueryMol::initFromJSON,
python::args("self", "text"))
.def("ToBinary", XQMolToBinary, python::args("self"))
.def("ToJSON", &ExtendedQueryMol::toJSON, python::args("self"));
.def("ToJSON", &ExtendedQueryMol::toJSON, python::args("self"))
.def("PatternFingerprintQuery",
&ExtendedQueryMol::patternFingerprintQuery,
(python::arg("self"), python::arg("fingerprintSize") = 2048));

python::def(
"MolHasSubstructMatch", &hasSubstructHelper,
Expand All @@ -129,6 +132,11 @@ BOOST_PYTHON_MODULE(rdGeneralizedSubstruct) {
python::arg("params") = python::object()),
"returns all matches (if any) of a molecule to a generalized substructure query");

python::def(
"PatternFingerprintTarget", &patternFingerprintTargetMol,
(python::arg("target"), python::arg("fingerprintSize") = 2048),
"Creates a pattern fingerprint for a target molecule that is compatible with an extended query");

python::def("CreateExtendedQueryMol", createExtendedQueryMolHelper,
(python::arg("mol"), python::arg("doEnumeration") = true,
python::arg("doTautomers") = true,
Expand Down
45 changes: 45 additions & 0 deletions Code/GraphMol/GeneralizedSubstruct/XQMol.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

#include "XQMol.h"

#include "GraphMol/Fingerprints/Fingerprints.h"

namespace RDKit {
namespace GeneralizedSubstruct {

Expand Down Expand Up @@ -53,6 +55,42 @@ void ExtendedQueryMol::initFromOther(const ExtendedQueryMol &other) {
}
}

std::unique_ptr<ExplicitBitVect> ExtendedQueryMol::patternFingerprintQuery(
unsigned fpSize) const {
if (std::holds_alternative<RWMol_T>(xqmol)) {
const auto raw = PatternFingerprintMol(*std::get<RWMol_T>(xqmol), fpSize, nullptr,
nullptr, true);
std::unique_ptr<ExplicitBitVect> ptr(raw);
return ptr;
} if (std::holds_alternative<MolBundle_T>(xqmol)) {
const auto raw = PatternFingerprintMol(*std::get<MolBundle_T>(xqmol), fpSize, nullptr,
true);
std::unique_ptr<ExplicitBitVect> ptr(raw);
return ptr;
} if (std::holds_alternative<TautomerQuery_T>(xqmol)) {
const auto raw = std::get<TautomerQuery_T>(xqmol)->patternFingerprintTemplate(fpSize);
std::unique_ptr<ExplicitBitVect> ptr(raw);
return ptr;
} if (std::holds_alternative<TautomerBundle_T>(xqmol)) {
const auto &tautomerBundle = std::get<TautomerBundle_T>(xqmol);
ExplicitBitVect *res = nullptr;
for (const auto &tautomer : *tautomerBundle) {
const auto molfp = tautomer->patternFingerprintTemplate(fpSize);
if (!res) {
res = molfp;
} else {
*res &= *molfp;
delete molfp;
}
}
std::unique_ptr<ExplicitBitVect> ptr(res);
return ptr;
}

throw std::invalid_argument("Unknown extended query molecule type");
}


std::vector<MatchVectType> SubstructMatch(
const ROMol &mol, const ExtendedQueryMol &query,
const SubstructMatchParameters &params) {
Expand Down Expand Up @@ -152,5 +190,12 @@ ExtendedQueryMol createExtendedQueryMol(const RWMol &mol, bool doEnumeration,
}
}
}

std::unique_ptr<ExplicitBitVect> patternFingerprintTargetMol(
const ROMol& mol, unsigned fpSize) {
const auto raw= PatternFingerprintMol(mol, fpSize, nullptr, nullptr, true);
std::unique_ptr<ExplicitBitVect> ptr(raw);
return ptr;
}
} // namespace GeneralizedSubstruct
} // namespace RDKit
8 changes: 8 additions & 0 deletions Code/GraphMol/GeneralizedSubstruct/XQMol.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ struct RDKIT_GENERALIZEDSUBSTRUCT_EXPORT ExtendedQueryMol {
ContainedType xqmol;
std::string toBinary() const;
std::string toJSON() const;

// Query fingerprint
std::unique_ptr<ExplicitBitVect> patternFingerprintQuery(
unsigned int fpSize = 2048U) const;
};

//! Creates an ExtendedQueryMol from the input molecule
Expand Down Expand Up @@ -102,6 +106,10 @@ RDKIT_GENERALIZEDSUBSTRUCT_EXPORT std::vector<MatchVectType> SubstructMatch(
const ROMol &mol, const ExtendedQueryMol &query,
const SubstructMatchParameters &params = SubstructMatchParameters());

//! Fingerprints a target molecule
RDKIT_GENERALIZEDSUBSTRUCT_EXPORT std::unique_ptr<ExplicitBitVect>
patternFingerprintTargetMol(const ROMol &mol, unsigned int fpSize = 2048U);

//! checks if a molecule has a match to an ExtendedQueryMol
inline bool hasSubstructMatch(
const ROMol &mol, const ExtendedQueryMol &query,
Expand Down
59 changes: 59 additions & 0 deletions Code/GraphMol/GeneralizedSubstruct/catch_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,15 @@
using namespace RDKit;
using namespace RDKit::GeneralizedSubstruct;

bool fingerprintsMatch(const ROMol& target, const ExtendedQueryMol& xqm) {
const auto queryFingerprint = xqm.patternFingerprintQuery();
const auto targetFingerprint = patternFingerprintTargetMol(target);
CHECK(queryFingerprint->getNumOnBits() > 0);
CHECK(targetFingerprint->getNumOnBits() > 0);
const auto match = AllProbeBitsMatch(*queryFingerprint, *targetFingerprint);
return match;
}

TEST_CASE("molecule basics") {
auto mol = "Cc1n[nH]c(F)c1"_smarts;
REQUIRE(mol);
Expand All @@ -39,6 +48,7 @@ TEST_CASE("molecule basics") {
CHECK(SubstructMatch(*"CCc1[nH]nc(F)c1"_smiles, *xq).empty());
CHECK(hasSubstructMatch(*"CCc1n[nH]c(F)c1"_smiles, *xq));
CHECK(!hasSubstructMatch(*"CCc1[nH]nc(F)c1"_smiles, *xq));
CHECK(fingerprintsMatch(*"CCc1n[nH]c(F)c1"_smiles, *xq));
}
}
}
Expand All @@ -57,6 +67,9 @@ TEST_CASE("enumeration basics") {
CHECK(SubstructMatch(*"COOCC"_smiles, *xq).size() == 1);
CHECK(SubstructMatch(*"COOOCC"_smiles, *xq).size() == 1);
CHECK(SubstructMatch(*"COOOOCC"_smiles, *xq).empty());
CHECK(fingerprintsMatch(*"COCC"_smiles, *xq));
CHECK(fingerprintsMatch(*"COOCC"_smiles, *xq));
CHECK(fingerprintsMatch(*"COOOCC"_smiles, *xq));
}
}
}
Expand All @@ -76,6 +89,9 @@ TEST_CASE("result counts") {
CHECK(SubstructMatch(*"COOCC"_smiles, *xq, ps).size() == 2);
CHECK(SubstructMatch(*"COOOCC"_smiles, *xq, ps).size() == 2);
CHECK(SubstructMatch(*"COOOOCC"_smiles, *xq, ps).empty());
CHECK(fingerprintsMatch(*"COCC"_smiles, *xq));
CHECK(fingerprintsMatch(*"COOCC"_smiles, *xq));
CHECK(fingerprintsMatch(*"COOOCC"_smiles, *xq));
}
}
}
Expand All @@ -93,6 +109,9 @@ TEST_CASE("tautomer basics") {
CHECK(SubstructMatch(*"CCc1n[nH]c(F)c1"_smiles, *xq).size() == 1);
CHECK(SubstructMatch(*"CCc1[nH]nc(F)c1"_smiles, *xq).size() == 1);
CHECK(SubstructMatch(*"CCc1[nH]ncc1"_smiles, *xq).empty());
CHECK(fingerprintsMatch(*"CCc1n[nH]c(F)c1"_smiles, *xq));
CHECK(fingerprintsMatch(*"CCc1[nH]nc(F)c1"_smiles, *xq));
CHECK(!fingerprintsMatch(*"CCc1[nH]ncc1"_smiles, *xq));
}
}
}
Expand All @@ -119,6 +138,11 @@ TEST_CASE("tautomer bundle basics") {
CHECK(SubstructMatch(*"CCc1[nH]ncc1F"_smiles, *xq).size() == 1);
CHECK(SubstructMatch(*"CCc1n[nH]cc1F"_smiles, *xq).size() == 1);
CHECK(SubstructMatch(*"CCc1[nH]ncc1"_smiles, *xq).empty());
CHECK(fingerprintsMatch(*"CCc1n[nH]c(F)c1"_smiles, *xq));
CHECK(fingerprintsMatch(*"CCc1[nH]nc(F)c1"_smiles, *xq));
CHECK(fingerprintsMatch(*"CCc1[nH]ncc1F"_smiles, *xq));
CHECK(fingerprintsMatch(*"CCc1n[nH]cc1F"_smiles, *xq));
CHECK(!fingerprintsMatch(*"CCc1[nH]ncc1"_smiles, *xq));
}
}
}
Expand All @@ -136,6 +160,8 @@ TEST_CASE("createExtendedQueryMol and copy ctors") {
CHECK(std::holds_alternative<ExtendedQueryMol::RWMol_T>(xqm.xqmol));
CHECK(SubstructMatch(*"COCC"_smiles, xqm).size() == 1);
CHECK(SubstructMatch(*"COOCC"_smiles, xqm).empty());
CHECK(fingerprintsMatch(*"COCC"_smiles, xqm));
CHECK(!fingerprintsMatch(*"COOCC"_smiles, xqm));
}
}
SECTION("MolBundle") {
Expand All @@ -152,6 +178,9 @@ TEST_CASE("createExtendedQueryMol and copy ctors") {
CHECK(SubstructMatch(*"COOCC"_smiles, xqm).size() == 1);
CHECK(SubstructMatch(*"COOOCC"_smiles, xqm).size() == 1);
CHECK(SubstructMatch(*"COOOOCC"_smiles, xqm).empty());
CHECK(fingerprintsMatch(*"COCC"_smiles, xqm));
CHECK(fingerprintsMatch(*"COOCC"_smiles, xqm));
CHECK(fingerprintsMatch(*"COOOCC"_smiles, xqm));
}
}
SECTION("TautomerQuery") {
Expand All @@ -169,6 +198,9 @@ TEST_CASE("createExtendedQueryMol and copy ctors") {
CHECK(SubstructMatch(*"CCC1OC(=N)N1"_smiles, *mol1).empty());
CHECK(SubstructMatch(*"CCC1OC(=N)N1"_smiles, xqm).size() == 1);
CHECK(SubstructMatch(*"c1[nH]ncc1"_smiles, xqm).empty());
CHECK(fingerprintsMatch(*"CCC1OC(N)=N1"_smiles, xqm));
CHECK(fingerprintsMatch(*"CCC1OC(=N)N1"_smiles, xqm));
CHECK(!fingerprintsMatch(*"c1[nH]ncc1"_smiles, xqm));
}
}
SECTION("TautomerBundle") {
Expand All @@ -186,6 +218,9 @@ TEST_CASE("createExtendedQueryMol and copy ctors") {
CHECK(SubstructMatch(*"COOCC1(F)OC(=N)N1"_smiles, xqm).size() == 1);
CHECK(SubstructMatch(*"COCC1OC(N)=N1"_smiles, xqm).size() == 1);
CHECK(SubstructMatch(*"COOOOCC1OC(=N)N1"_smiles, xqm).empty());
CHECK(fingerprintsMatch(*"COCC1(F)OC(N)=N1"_smiles, xqm));
CHECK(fingerprintsMatch(*"COOCC1(F)OC(=N)N1"_smiles, xqm));
CHECK(fingerprintsMatch(*"COCC1OC(N)=N1"_smiles, xqm));
}
}
}
Expand All @@ -200,6 +235,8 @@ TEST_CASE("test SRUs") {
// we won't test limits here.
CHECK(SubstructMatch(*"FCN(C)CC"_smiles, xqm).size() == 1);
CHECK(SubstructMatch(*"FCN(O)N(C)CC"_smiles, xqm).size() == 1);
CHECK(fingerprintsMatch(*"FCN(C)CC"_smiles, xqm));
CHECK(fingerprintsMatch(*"FCN(O)N(C)CC"_smiles, xqm));
}
}

Expand Down Expand Up @@ -232,6 +269,14 @@ TEST_CASE("adjustQueryProperties") {
CHECK(SubstructMatch(*"COC1OC1"_smiles, xqm1).empty());
CHECK(SubstructMatch(*"COC1OC1"_smiles, xqm2).empty());
CHECK(SubstructMatch(*"COC1OC1"_smiles, xqm3).size() == 1);
CHECK(fingerprintsMatch(*"COC1CC1"_smiles, xqm1));
CHECK(fingerprintsMatch(*"COC1CC1"_smiles, xqm2));
CHECK(fingerprintsMatch(*"COC1CC1"_smiles, xqm3));
CHECK(fingerprintsMatch(*"COC1C(C)C1"_smiles, xqm1));
CHECK(fingerprintsMatch(*"COC1C(C)C1"_smiles, xqm3));
CHECK(!fingerprintsMatch(*"COC1OC1"_smiles, xqm1));
CHECK(!fingerprintsMatch(*"COC1OC1"_smiles, xqm2));
CHECK(fingerprintsMatch(*"COC1OC1"_smiles, xqm3));
}
SECTION("MolBundle") {
auto mol = "COCC |LN:1:1.3|"_smiles;
Expand All @@ -248,6 +293,10 @@ TEST_CASE("adjustQueryProperties") {
CHECK(SubstructMatch(*"COOC=C"_smiles, xqm1).empty());
CHECK(SubstructMatch(*"COC=C"_smiles, xqm2).size() == 1);
CHECK(SubstructMatch(*"COOC=C"_smiles, xqm2).size() == 1);
CHECK(!fingerprintsMatch(*"COC=C"_smiles, xqm1));
CHECK(!fingerprintsMatch(*"COOC=C"_smiles, xqm1));
CHECK(fingerprintsMatch(*"COC=C"_smiles, xqm2));
CHECK(fingerprintsMatch(*"COOC=C"_smiles, xqm2));
}
SECTION("TautomerQuery") {
auto mol1 = "CC1OC(N)=N1"_smiles;
Expand All @@ -266,6 +315,10 @@ TEST_CASE("adjustQueryProperties") {
CHECK(SubstructMatch(*"CC1(F)OC(=N)N1"_smiles, xqm1).size() == 1);
CHECK(SubstructMatch(*"CC1(F)OC(N)=N1"_smiles, xqm2).empty());
CHECK(SubstructMatch(*"CC1(F)OC(=N)N1"_smiles, xqm2).empty());
CHECK(fingerprintsMatch(*"CC1OC(N)=N1"_smiles, xqm1));
CHECK(fingerprintsMatch(*"CC1OC(N)=N1"_smiles, xqm2));
CHECK(fingerprintsMatch(*"CC1(F)OC(N)=N1"_smiles, xqm1));
CHECK(fingerprintsMatch(*"CC1(F)OC(=N)N1"_smiles, xqm1));
}
SECTION("TautomerBundle") {
auto mol1 = "COCC1OC(N)=N1 |LN:1:1.3|"_smiles;
Expand All @@ -283,6 +336,12 @@ TEST_CASE("adjustQueryProperties") {
CHECK(SubstructMatch(*"COOCC1(F)OC(=N)N1"_smiles, xqm1).size() == 1);
CHECK(SubstructMatch(*"COCC1(F)OC(N)=N1"_smiles, xqm2).empty());
CHECK(SubstructMatch(*"COOCC1(F)OC(=N)N1"_smiles, xqm2).empty());
CHECK(fingerprintsMatch(*"COCC1OC(N)=N1"_smiles, xqm1));
CHECK(fingerprintsMatch(*"COOCC1OC(=N)N1"_smiles, xqm1));
CHECK(fingerprintsMatch(*"COCC1OC(N)=N1"_smiles, xqm2));
CHECK(fingerprintsMatch(*"COOCC1OC(=N)N1"_smiles, xqm2));
CHECK(fingerprintsMatch(*"COCC1(F)OC(N)=N1"_smiles, xqm1));
CHECK(fingerprintsMatch(*"COOCC1(F)OC(=N)N1"_smiles, xqm1));
}
}

Expand Down
17 changes: 17 additions & 0 deletions Code/JavaWrappers/GeneralizedSubstruct.i
Original file line number Diff line number Diff line change
@@ -1,10 +1,21 @@


#if SWIG_VERSION >= 0x040101
%include <std_unique_ptr.i>
%unique_ptr(ExplicitBitVect)
#endif

%{
#include <GraphMol/GeneralizedSubstruct/XQMol.h>
%}
// %include "std_unique_ptr.i"
// %unique_ptr(ExtendedQueryMol)

#if SWIG_VERSION < 0x040101
%ignore patternFingerprintTargetMol(const ROMol &mol, unsigned int fpSize = 2048U);
%ignore patternFingerprintQuery(unsigned int fpSize = 2048U) const;
#endif

%ignore ExtendedQueryMol(std::unique_ptr<RWMol> mol);
%ignore ExtendedQueryMol(std::unique_ptr<MolBundle> mol);
%ignore ExtendedQueryMol(std::unique_ptr<TautomerQuery> mol);
Expand All @@ -14,3 +25,9 @@

%include "GraphMol/GeneralizedSubstruct/XQMol.h";

%extend RDKit::GeneralizedSubstruct::ExtendedQueryMol {
std::vector< std::vector<std::pair<int, int> > > getSubstructMatches(RDKit::ROMol &target,RDKit::SubstructMatchParameters ps = RDKit::SubstructMatchParameters()){
std::vector<RDKit::MatchVectType> mvs = SubstructMatch(target, *($self),ps);
return mvs;
};
}
1 change: 1 addition & 0 deletions Code/JavaWrappers/csharp_wrapper/GraphMolCSharp.i
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ typedef unsigned long long int uintmax_t;
%include "../MolHash.i"
%include "../Abbreviations.i"
%include "../Streams.i"
%include "../GeneralizedSubstruct.i"


// Create a class to throw various sorts of errors for testing. Required for unit tests in ErrorHandlingTests.java
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
using GraphMolWrap;
using Xunit;

namespace RdkitTests
{
public class GeneralizedSubstructTest
{
private bool FingerprintsMatch(ExtendedQueryMol queryMol, RWMol target)
{
var queryFingerprint = queryMol.patternFingerprintQuery();
var targetFingerprint = RDKFuncs.patternFingerprintTargetMol(target);
Assert.True(queryFingerprint.getNumOnBits() > 0);
Assert.True(targetFingerprint.getNumOnBits() > 0);
var match = RDKFuncs.AllProbeBitsMatch(queryFingerprint, targetFingerprint);
return match;
}

[Fact]
public void TestControlSteps()
{
var queryMol = RWMol.MolFromSmiles("COCC1OC(N)=N1 |LN:1:1.3|");
var xqm1 = RDKFuncs.createExtendedQueryMol(queryMol);
var xqm2 = RDKFuncs.createExtendedQueryMol(queryMol, false);
var xqm3 = RDKFuncs.createExtendedQueryMol(queryMol, true, false);
var xqm4 = RDKFuncs.createExtendedQueryMol(queryMol, false, false);

var mol1 = RWMol.MolFromSmiles("COCC1OC(N)=N1");
Assert.Equal(1, xqm1.getSubstructMatches(mol1).Count);
Assert.Equal(1, xqm2.getSubstructMatches(mol1).Count);
Assert.Equal(1, xqm3.getSubstructMatches(mol1).Count);
Assert.Equal(1, xqm4.getSubstructMatches(mol1).Count);
Assert.True(FingerprintsMatch(xqm1, mol1));
Assert.True(FingerprintsMatch(xqm2, mol1));
Assert.True(FingerprintsMatch(xqm3, mol1));
Assert.True(FingerprintsMatch(xqm4, mol1));

var mol2 = RWMol.MolFromSmiles("COCC1OC(=N)N1");
Assert.Equal(1, xqm1.getSubstructMatches(mol2).Count);
Assert.Equal(1, xqm2.getSubstructMatches(mol2).Count);
Assert.Equal(0, xqm3.getSubstructMatches(mol2).Count);
Assert.Equal(0, xqm4.getSubstructMatches(mol2).Count);
Assert.True(FingerprintsMatch(xqm1, mol2));
Assert.True(FingerprintsMatch(xqm2, mol2));
Assert.False(FingerprintsMatch(xqm3, mol2));
Assert.False(FingerprintsMatch(xqm4, mol2));

var mol3 = RWMol.MolFromSmiles("COOCC1OC(N)=N1");
Assert.Equal(1, xqm1.getSubstructMatches(mol3).Count);
Assert.Equal(0, xqm2.getSubstructMatches(mol3).Count);
Assert.Equal(1, xqm3.getSubstructMatches(mol3).Count);
Assert.Equal(0, xqm4.getSubstructMatches(mol3).Count);
Assert.True(FingerprintsMatch(xqm1, mol3));
Assert.True(FingerprintsMatch(xqm3, mol3));
}
}
}

0 comments on commit 31de6b0

Please sign in to comment.