Skip to content

Commit

Permalink
Rascal bond label error (rdkit#8199)
Browse files Browse the repository at this point in the history
* Fix atom ordering in bond label.

* Response to review.

---------

Co-authored-by: David Cosgrove <[email protected]>
Co-authored-by: Greg Landrum <[email protected]>
  • Loading branch information
3 people authored Jan 29, 2025
1 parent af63479 commit 1fba446
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 16 deletions.
23 changes: 7 additions & 16 deletions Code/GraphMol/RascalMCES/RascalMCES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -265,23 +265,14 @@ void getBondLabels(const ROMol &mol, const RascalOptions &opts,
} else {
bondType = std::to_string(b->getBondType());
}
if (b->getBeginAtom()->getAtomicNum() == b->getEndAtom()->getAtomicNum()) {
if (b->getEndAtom()->getDegree() < b->getBeginAtom()->getDegree()) {
bondLabels[b->getIdx()] = atomLabels[b->getEndAtomIdx()] + bondType +
atomLabels[b->getBeginAtomIdx()];
} else {
bondLabels[b->getIdx()] = atomLabels[b->getBeginAtomIdx()] + bondType +
atomLabels[b->getEndAtomIdx()];
}
} else {
if (b->getBeginAtom()->getAtomicNum() < b->getEndAtom()->getAtomicNum()) {
bondLabels[b->getIdx()] = atomLabels[b->getBeginAtomIdx()] + bondType +
atomLabels[b->getEndAtomIdx()];
} else {
bondLabels[b->getIdx()] = atomLabels[b->getEndAtomIdx()] + bondType +
atomLabels[b->getBeginAtomIdx()];
}
// The atom labels need to be in consistent order irrespective
// of input order.
auto lbl1 = atomLabels[b->getBeginAtomIdx()];
auto lbl2 = atomLabels[b->getEndAtomIdx()];
if (lbl1 < lbl2) {
std::swap(lbl1, lbl2);
}
bondLabels[b->getIdx()] = lbl1 + bondType + lbl2;
}
}

Expand Down
40 changes: 40 additions & 0 deletions Code/GraphMol/RascalMCES/mces_catch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1379,6 +1379,10 @@ TEST_CASE("Equivalent bonds") {
}

TEST_CASE("Atom aromaticity match") {
// The first 2 tests give the results they do because the flag
// says to ignore _atom_ aromaticity, but it still takes into
// account _bond_ aromaticity. It's so that the C-N bond
// matches in both cases in the first test.
{
auto m1 = "c1ccccc1NCC"_smiles;
REQUIRE(m1);
Expand Down Expand Up @@ -1443,6 +1447,42 @@ TEST_CASE("Empty results bug") {
REQUIRE(!res.empty());
}

TEST_CASE("Order of atoms in bond labels must be consistent - Github 8198.") {
{
auto m1 = "c1ccccc1C"_smiles;
REQUIRE(m1);
auto m2 = "c1ccccc1C2CC2"_smiles;
REQUIRE(m2);

RascalOptions opts;
opts.similarityThreshold = 0.1;
opts.ignoreAtomAromaticity = false;
auto res = rascalMCES(*m1, *m2, opts);
REQUIRE(res.size() == 1);
CHECK(res.front().getAtomMatches().size() == 7);
CHECK(res.front().getBondMatches().size() == 7);
CHECK(res.front().getSmarts() == "c1:c:c:c:c:c:1-C");
check_smarts_ok(*m1, *m2, res.front());
}
{
auto m1 = "Cc1ccc(C2=NN(C(N)=S)C(c3ccc(F)cc3)C2)cc1C"_smiles;
REQUIRE(m1);
auto m2 = "CC(=O)N1N=C(c2ccc(C)c(C)c2)CC1c1ccc(F)cc1"_smiles;
REQUIRE(m2);

RascalOptions opts;
opts.similarityThreshold = 0.1;
opts.ignoreAtomAromaticity = false;
auto res = rascalMCES(*m1, *m2, opts);
REQUIRE(res.size() == 1);
CHECK(res.front().getAtomMatches().size() == 21);
CHECK(res.front().getBondMatches().size() == 23);
CHECK(res.front().getSmarts() ==
"Cc1:c:c:c(-C2=NN(-C)-C(-c3:c:c:c(-F):c:c:3)-C2):c:c:1-C");
check_smarts_ok(*m1, *m2, res.front());
}
}

TEST_CASE("Duplicate Single Largest Frag") {
{
auto m1 = "c1ccc2c(c1)c(ncn2)CNCc3ccc(cc3)Cl"_smiles;
Expand Down

0 comments on commit 1fba446

Please sign in to comment.