From b1db53ceba941a2f0d8be648d21e475c58f420ea Mon Sep 17 00:00:00 2001 From: Christopher Woods Date: Sat, 22 Jun 2024 18:30:02 +0100 Subject: [PATCH 1/2] Now reorder cutgroups so that the CGAtomIdx order always matches the AtomIdx order. This works really well, except that it fails when merging submolecules, because the CGAtomIdx order changes on commit. I need to fix this. I suspect it will be by converting merged proteins into a single cutgroup? --- corelib/src/libs/SireMol/structureeditor.cpp | 128 +++++++++++++++++++ tests/mol/test_reorder.py | 36 ++++++ 2 files changed, 164 insertions(+) create mode 100644 tests/mol/test_reorder.py diff --git a/corelib/src/libs/SireMol/structureeditor.cpp b/corelib/src/libs/SireMol/structureeditor.cpp index df276c401..0452b4e8e 100644 --- a/corelib/src/libs/SireMol/structureeditor.cpp +++ b/corelib/src/libs/SireMol/structureeditor.cpp @@ -3210,6 +3210,134 @@ const MoleculeInfoData &StructureEditor::commitInfo() } } + // make sure that the AtomIdx order matches the CGAtomIdx order + // This is needed to remove confusion in code that assumes + // AtomIdx order is always correct. This is a big change in code + // behaviour, but it is necessary to make the code more robust, + // and also recognises the reality that CutGroups are now "under + // the hood" in the code and not really visible or known about by + // end users. + int atom_count = 0; + bool in_atomidx_order = true; + + for (int i = 0; i < this->nCutGroupsInMolecule(); ++i) + { + const auto cgdata = this->getCGData(CGIdx(i)); + + for (const auto &atomidx : cgdata.get<1>()) + { + if (atomidx != AtomIdx(atom_count)) + { + in_atomidx_order = false; + break; + } + + ++atom_count; + } + } + + if (atom_count != this->nAtomsInMolecule()) + { + in_atomidx_order = false; + } + + if (not in_atomidx_order) + { + SireBase::Console::warning(QObject::tr( + "The atoms in the CutGroups are not in the same order as the atoms in the molecule. " + "Rebuilding CutGroups so that the CGAtomIdx order matches the AtomIdx order.")); + + // are the atoms contiguous in residues? + bool contiguous_residues = true; + QSet seen_residues; + seen_residues.reserve(this->nResiduesInMolecule()); + ResIdx prev_residx = ResIdx::null(); + + for (int i = 0; i < this->nAtomsInMolecule(); ++i) + { + const auto info = this->getAtomData(AtomIdx(i)); + + if (prev_residx.isNull()) + { + prev_residx = info.get<4>(); + seen_residues.insert(prev_residx); + } + else + { + if (info.get<4>() != prev_residx) + { + prev_residx = info.get<4>(); + + if (seen_residues.contains(prev_residx)) + { + contiguous_residues = false; + break; + } + else + { + seen_residues.insert(prev_residx); + } + } + } + } + + // delete all existing CutGroups + this->removeAllCutGroups(); + + if (contiguous_residues) + { + prev_residx = ResIdx::null(); + int cgcount = 0; + + for (int i = 0; i < this->nAtomsInMolecule(); ++i) + { + const auto info = this->getAtomData(AtomIdx(i)); + + if (prev_residx.isNull() or info.get<4>() != prev_residx) + { + prev_residx = info.get<4>(); + this->addCutGroup().rename(CGName(QString::number(cgcount))); + cgcount += 1; + } + + this->reparentAtom(this->getUID(AtomIdx(i)), CGIdx(cgcount - 1)); + } + + if (cgcount != this->nResiduesInMolecule()) + { + SireBase::Console::warning(QObject::tr( + "The number of CutGroups created (%1) does not match " + "the number of residues in the molecule (%2)! Rebuilding " + "into a single CutGroup.") + .arg(cgcount) + .arg(this->nResiduesInMolecule())); + + this->removeAllCutGroups(); + + this->addCutGroup().rename(CGName("0")); + + for (int i = 0; i < this->nAtomsInMolecule(); ++i) + { + this->reparentAtom(this->getUID(AtomIdx(i)), CGIdx(0)); + } + } + } + else + { + // create a new CutGroup and add all atoms to it + SireBase::Console::warning(QObject::tr( + "The atoms in the molecule are not contiguous in residues. " + "Rebuilding into a single CutGroup.")); + + this->addCutGroup().rename(CGName("0")); + + for (int i = 0; i < this->nAtomsInMolecule(); ++i) + { + this->reparentAtom(this->getUID(AtomIdx(i)), CGIdx(0)); + } + } + } + d->cached_molinfo = new MoleculeInfoData(*this); } diff --git a/tests/mol/test_reorder.py b/tests/mol/test_reorder.py new file mode 100644 index 000000000..e04325da8 --- /dev/null +++ b/tests/mol/test_reorder.py @@ -0,0 +1,36 @@ +def test_reorder_atoms(ala_mols): + mols = ala_mols + + mol = mols[0] + + # check that reorder preserves residue cutting + mol = mol.edit() + atom = mol.atom(2) + atom = atom.reindex(0) + mol = atom.molecule().commit() + + assert mol.num_residues() == 3 + assert mol.num_cutgroups() == mol.num_residues() + + atomidx = 0 + + for cutgroup in mol.cutgroups(): + for atom in cutgroup.atoms(): + assert atom.index().value() == atomidx + atomidx += 1 + + # now check with reordering that break residue cutting + mol = mols[0] + + mol = mol.edit() + atom = mol.atom("HA") + atom = atom.reindex(0) + mol = atom.molecule().commit() + + assert mol.num_cutgroups() == 1 + + atomidx = 0 + + for atom in mol.cutgroups()[0].atoms(): + assert atom.index().value() == atomidx + atomidx += 1 From 26a79eeca9ed143b73e9f1a595fc58a8065db942 Mon Sep 17 00:00:00 2001 From: Christopher Woods Date: Sat, 22 Jun 2024 22:55:08 +0100 Subject: [PATCH 2/2] Fixed the merge code by making sure that the AtomIdx of the added atom is correct for its position in the molecule (i.e. is one higher than the AtomIdx of the last atom in the residue) --- corelib/src/libs/SireSystem/merge.cpp | 14 ++++++++++++++ doc/source/changelog.rst | 13 +++++++++++++ 2 files changed, 27 insertions(+) diff --git a/corelib/src/libs/SireSystem/merge.cpp b/corelib/src/libs/SireSystem/merge.cpp index 5031f81d8..d01703f76 100644 --- a/corelib/src/libs/SireSystem/merge.cpp +++ b/corelib/src/libs/SireSystem/merge.cpp @@ -390,12 +390,26 @@ namespace SireSystem auto res = mol.residue(residx); + // get the AtomIdx of the last atom in this residue + AtomIdx last_atomidx(0); + + if (res.nAtoms() > 0) + { + last_atomidx = res.atom(res.nAtoms() - 1).index(); + } + // add the atom - it has the name "Xxx" as it doesn't exist // in the reference state auto atom = res.add(AtomName("Xxx")); largest_atomnum = AtomNum(largest_atomnum.value() + 1); atom.renumber(largest_atomnum); + // ensure that its index follows on from the index of the + // last atom in the residue - this is so that we keep + // the AtomIdx and CGAtomIdx orders in sync, and don't + // force a complex reordering of the atoms when we commit + atom.reindex(last_atomidx + 1); + // reparent this atom to the CutGroup for this residue atom.reparent(cgidx); diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst index 6af579eef..ac86ad415 100644 --- a/doc/source/changelog.rst +++ b/doc/source/changelog.rst @@ -55,6 +55,19 @@ organisation on `GitHub `__. * Added a map option (fix_perturbable_zero_sigmas) to prevent perturbation of the Lennard-Jones sigma parameter for ghost atoms during alchemical free energy simulations. +* [CHANGE IN BEHAVIOUR] - added code that ensures that, when editing molecules, + the CGAtomIdx order will always follow the AtomIdx order of atoms. This is + because a lot of code had implicitly assumed this, and so it was a cause + of bugs when this wasn't the case. Now, when you edit a molecule, on committing, + the orders will be checked. If they don't agree, then the CutGroups will be + reordered, with atoms reordered as necessary to make the CGAtomIdx order match + the AtomIdx order. If this isn't possible (e.g. because atoms in CutGroups + are not contiguous), then the molecule will be converted to a single-cutgroup + molecule, with the atoms placed in AtomIdx order. As part of this change, + the merge code will now also ensure that added atoms are added with the + correct AtomIdx, rather than added as the last atoms in the molecule. This + is also more natural. This fixes issue #202. + * Please add an item to this changelog when you create your PR `2024.1.0 `__ - April 2024