Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge multiprocessing #78

Open
wants to merge 13 commits into
base: develop
Choose a base branch
from
2 changes: 2 additions & 0 deletions meeko/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from .reactive import reactive_typer
from .reactive import get_reactive_config
from .writer import PDBQTWriterLegacy
from .parallelizer import Parallelizer
from . import analysis

__all__ = ['MoleculePreparation', 'RDKitMoleculeSetup',
Expand All @@ -43,6 +44,7 @@
'reactive_typer',
'get_reactive_config',
'gridbox',
"Parallelizer",
]

if _has_openbabel:
Expand Down
3 changes: 2 additions & 1 deletion meeko/atomtyper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import os

import logging
import numpy as np

from .utils import utils
Expand Down Expand Up @@ -56,7 +57,7 @@ def _type_atoms(molsetup, atom_params):
for atompar in ensure:
if len(set(ensure[atompar])) > 1:
msg = 'WARNING: %s is modified in multiple smartsgroups: %s' % (atompar, set(ensure[atompar]))
print(msg)
logging.warning(msg)
return


Expand Down
10 changes: 5 additions & 5 deletions meeko/covalentbuilder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from collections import namedtuple
import logging
from rdkit import Chem
from rdkit.Chem import AllChem, rdGeometry
import prody
Expand Down Expand Up @@ -76,12 +77,11 @@ def _generate_prody_selection(self, residue):
sel_string.append("resnum %s" % res_num)
sel_string.append("(name %s or name %s)" % (atname1, atname2))
sel_string = " and ".join(sel_string)
print("CovalentBuilder> searching for residue:",sel_string)
logging.info("CovalentBuilder> searching for residue:",sel_string)
found = self.rec.select( sel_string )
if found is None:
print("ERROR: no residue found with the following specification: chain[%s] residue[%s] number[%s] atom names [%s,%s]"% (
raise ValueError("ERROR: no residue found with the following specification: chain[%s] residue[%s] number[%s] atom names [%s,%s]"% (
chid, res_type, res_num, atname1, atname2))
raise ValueError
return (found, atname1, atname2)
# returnsout

Expand Down Expand Up @@ -109,7 +109,7 @@ def _compact_selection(self, sel_info, allow_missing=False):
if None in self.residues[res_id]:
c,r,n = res_id
f = [ x['atname'] if not x is None else "None" for x in self.residues[res_id]]
print("WARNING: one or more atoms are missing in residue %s:%s%d (requested: %s,%s | found: %s,%s)" % (c,r,n,at1,at2, f[0], f[1]) )
logging.warning("WARNING: one or more atoms are missing in residue %s:%s%d (requested: %s,%s | found: %s,%s)" % (c,r,n,at1,at2, f[0], f[1]) )
if not allow_missing:
raise ValueError
del self.residues[res_id]
Expand Down Expand Up @@ -150,7 +150,7 @@ def find_smarts(self, mol, smarts, smarts_indices, first_only):
found = mol.GetSubstructMatches(patt)
#print("CovalentBuilder> ligand patterns found: ", found, "[ use only first: %s ]" % first_only)
if len(found)>1 and first_only:
print("WARNING: the specified ligand pattern returned more than one match: [%d] (potential ambiguity?)" % len(found))
logging.warning("WARNING: the specified ligand pattern returned more than one match: [%d] (potential ambiguity?)" % len(found))
for f in found:
#print("CovalentBuilder> processing:", f, "with ", smarts_indices)
indices.append([f[x] for x in smarts_indices])
Expand Down
3 changes: 2 additions & 1 deletion meeko/macrocycle.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
import sys
from collections import defaultdict
import logging
from operator import itemgetter


Expand Down Expand Up @@ -224,7 +225,7 @@ def get_broken_rings(self, rings, broken_bonds):


def show_macrocycle_scores(self, setup):
print("Warning: not showing macrocycle scores, check implementation.")
logging.warning("Warning: not showing macrocycle scores, check implementation.")
return
if setup is not None:
print("\n==============[ MACROCYCLE SCORES ]================")
Expand Down
3 changes: 2 additions & 1 deletion meeko/molecule_pdbqt.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os
from collections import defaultdict

import logging
import numpy as np
from scipy import spatial

Expand Down Expand Up @@ -676,7 +677,7 @@ def write_pdbqt_file(self, output_pdbqtfilename, overwrite=False, as_model=False
as_model (bool): Qdd MODEL/ENDMDL keywords to the output PDBQT string (default: False)

"""
print(overwrite and os.path.isfile(output_pdbqtfilename))
logging.info(overwrite and os.path.isfile(output_pdbqtfilename))
if not overwrite and os.path.isfile(output_pdbqtfilename):
raise RuntimeError('Output PDBQT file %s already exists' % output_pdbqtfilename)

Expand Down
31 changes: 16 additions & 15 deletions meeko/molsetup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from copy import deepcopy
from collections import defaultdict, OrderedDict
import json
import logging
import warnings
import sys

Expand Down Expand Up @@ -90,7 +91,7 @@ def add_atom(self, idx=None, coord=np.array([0.0, 0.0,0.0], dtype='float'),
if idx is None:
idx = len(self.coord)
if idx in self.coord and not overwrite:
print("ADD_ATOM> Error: the idx [%d] is already occupied (use 'overwrite' to force)")
logging.error("ADD_ATOM> Error: the idx [%d] is already occupied (use 'overwrite' to force)")
return False
self.set_coord(idx, coord)
self.set_charge(idx, charge)
Expand Down Expand Up @@ -129,7 +130,7 @@ def add_pseudo(self, coord=np.array([0.0,0.0,0.0], dtype='float'), charge=0.0,
"""
idx = self.atom_true_count + len(self.atom_pseudo)
if idx in self.coord and not overwrite:
print("ADD_PSEUDO> Error: the idx [%d] is already occupied (use 'overwrite' to force)")
logging.error("ADD_PSEUDO> Error: the idx [%d] is already occupied (use 'overwrite' to force)")
return False
self.atom_pseudo.append(idx)
# add the pseudoatom information to the atoms
Expand Down Expand Up @@ -489,34 +490,34 @@ def get_smiles_and_order(self):
def show(self):
tot_charge = 0

print("Molecule setup\n")
print("==============[ ATOMS ]===================================================")
print("idx | coords | charge |ign| atype | connections")
print("-----+----------------------------+--------+---+----------+--------------- . . . ")
logging.info(f"Molecule setup for {self.get_mol_name()}\n")
logging.debug("==============[ ATOMS ]===================================================")
logging.debug("idx | coords | charge |ign| atype | connections")
logging.debug("-----+----------------------------+--------+---+----------+--------------- . . . ")
for k, v in list(self.coord.items()):
print("% 4d | % 8.3f % 8.3f % 8.3f | % 1.3f | %d" % (k, v[0], v[1], v[2],
logging.debug("% 4d | % 8.3f % 8.3f % 8.3f | % 1.3f | %d" % (k, v[0], v[1], v[2],
self.charge[k], self.atom_ignore[k]),
"| % -8s |" % self.atom_type[k],
self.graph[k])
tot_charge += self.charge[k]
print("-----+----------------------------+--------+---+----------+--------------- . . . ")
print(" TOT CHARGE: %3.3f" % tot_charge)
logging.debug("-----+----------------------------+--------+---+----------+--------------- . . . ")
logging.debug(" TOT CHARGE: %3.3f" % tot_charge)

print("\n======[ DIRECTIONAL VECTORS ]==========")
logging.debug("\n======[ DIRECTIONAL VECTORS ]==========")
for k, v in list(self.coord.items()):
if k in self.interaction_vector:
print("% 4d " % k, self.atom_type[k], end=' ')
logging.debug("% 4d " % k, self.atom_type[k], end=' ')

print("\n==============[ BONDS ]================")
logging.debug("\n==============[ BONDS ]================")
# For sanity users, we won't show those keys for now
keys_to_not_show = ['bond_order', 'type']
for k, v in list(self.bond.items()):
t = ', '.join('%s: %s' % (i, j) for i, j in v.items() if not i in keys_to_not_show)
print("% 8s - " % str(k), t)
logging.debug("% 8s - " % str(k), t)

# _macrocycle_typer.show_macrocycle_scores(self)

print('')
logging.debug('')



Expand All @@ -533,7 +534,7 @@ def from_mol(cls, mol, keep_chorded_rings=False, keep_equivalent_rings=False,
RDKitMoleculeSetup.warned_not3D = True
if mol.GetNumConformers() > 1 and conformer_id == -1:
msg = "RDKit molecule has multiple conformers. Considering only the first one."
print(msg, file=sys.stderr)
logging.error(msg)
molsetup = cls()
molsetup.mol = mol
molsetup.atom_true_count = molsetup.get_num_mol_atoms()
Expand Down
57 changes: 57 additions & 0 deletions meeko/parallelizer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Meeko multiprocessing manager
#

import platform
from time import sleep
import logging
import queue
import traceback
from .preparation import MoleculePreparation
import sys
from rdkit import Chem

if platform.system() == "Darwin": # mac
import multiprocess as multiprocessing
else:
import multiprocessing


class Parallelizer:
def __init__(self, max_proc, args, output, backend, is_covalent, preparator, covalent_builder) -> None:
self.n_workers = max_proc - 1 # reserve one core for pdbqt writing

self.args = args
self.output = output
self.backend = backend
self.is_covalent = is_covalent
self.preparator = preparator
self.covalent_builder = covalent_builder

self.processed_mols = 0
self.input_mol_skipped = 0
self.input_mol_with_failure = 0
self.nr_failures = 0

def _mp_wrapper(self, mol):
output_bundle = MoleculePreparation.prep_single_mol(mol, self.args, self.output, self.backend, self.is_covalent, self.preparator, self.covalent_builder, write_output=False)
return output_bundle

def process_mols(self, mol_supplier):
# set pickle options to prevent loss of names
Chem.SetDefaultPickleProperties(Chem.PropertyPickleOptions.MolProps |
Chem.PropertyPickleOptions.PrivateProps)

pool = multiprocessing.Pool(self.n_workers)
for is_valid, this_mol_had_failure, nr_f, output_pdbqts_info in pool.imap_unordered(self._mp_wrapper, mol_supplier):
for pdbqt_string, name, covLabel_suffix in output_pdbqts_info:
self.output(pdbqt_string, name, covLabel_suffix)
logging.info(f"Done writing PDBQT for {name}")
self.processed_mols += 1
self.input_mol_skipped += int(is_valid==False)
self.input_mol_with_failure += int(this_mol_had_failure)
self.nr_failures += nr_f

return self.input_mol_skipped, self.input_mol_with_failure, self.nr_failures
54 changes: 54 additions & 0 deletions meeko/preparation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from inspect import signature
import json
import logging
import os
import pathlib
import sys
Expand Down Expand Up @@ -260,3 +261,56 @@ def write_pdbqt_file(self, pdbqt_filename, add_index_map=None, remove_smiles=Non
with open(pdbqt_filename,'w') as w:
w.write(self.write_pdbqt_string(add_index_map, remove_smiles))

@staticmethod
def prep_single_mol(mol, args, output, backend, is_covalent, preparator, covalent_builder, write_output=True):
nr_failures = 0

# check that molecule was successfully loaded
if backend == 'rdkit':
is_valid = mol is not None
elif backend == 'ob':
is_valid = mol.NumAtoms() > 0
if not is_valid: return False, None, None

this_mol_had_failure = False
output_pdbqts_info = []

if is_covalent:
for cov_lig in covalent_builder.process(mol, args.tether_smarts, args.tether_smarts_indices):
root_atom_index = cov_lig.indices[0]
molsetups = preparator.prepare(cov_lig.mol, root_atom_index=root_atom_index, not_terminal_atoms=[root_atom_index])
res, chain, num = cov_lig.res_id
suffixes = output.get_suffixes(molsetups)
for molsetup, suffix in zip(molsetups, suffixes):
pdbqt_string, success, error_msg = PDBQTWriterLegacy.write_string(molsetup, bad_charge_ok=args.bad_charge_ok)
if success:
pdbqt_string = PDBQTWriterLegacy.adapt_pdbqt_for_autodock4_flexres(pdbqt_string, res, chain, num)
name = molsetup.name
if write_output:
output(pdbqt_string, name, (cov_lig.label, suffix))
else:
output_pdbqts_info.append((pdbqt_string, name, (cov_lig.label, suffix)))
else:
nr_failures += 1
this_mol_had_failure = True
logging.error(error_msg)

else:
molsetups = preparator.prepare(mol)
suffixes = output.get_suffixes(molsetups)
for molsetup, suffix in zip(molsetups, suffixes):
pdbqt_string, success, error_msg = PDBQTWriterLegacy.write_string(molsetup, bad_charge_ok=args.bad_charge_ok)
if success:
name = molsetup.name
if write_output:
output(pdbqt_string, name, (suffix,))
else:
output_pdbqts_info.append((pdbqt_string, name, (suffix,)))
molsetup.show()
else:
nr_failures += 1
this_mol_had_failure = True
logging.error(error_msg)

return is_valid, this_mol_had_failure, nr_failures, output_pdbqts_info

3 changes: 2 additions & 1 deletion meeko/receptor_pdbqt.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from collections import defaultdict
import json
import logging
from os import linesep as os_linesep
import pathlib
import sys
Expand Down Expand Up @@ -211,7 +212,7 @@ def assign_types_charges(self, residue_params=residue_params):
ok &= ok_
err += err_
if not ok_:
print("did not match %s with template" % str(r_id), file=sys.stderr)
logging.error("did not match %s with template" % str(r_id))
continue
for key in wanted_params:
atom_params[key].extend(params_this_res[key])
Expand Down
11 changes: 11 additions & 0 deletions meeko/utils/rdkitutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class Mol2MolSupplier():
Parameters
sanitize: perform RDKit sanitization of Mol2 molecule"""
def __init__(self, filename, sanitize=True, removeHs=False, cleanupSubstructures=True):
self.filename = filename
self.fp = open(filename, 'r')
self._opts = {'sanitize':sanitize,
'removeHs':removeHs,
Expand All @@ -52,6 +53,16 @@ def __init__(self, filename, sanitize=True, removeHs=False, cleanupSubstructures

def __iter__(self):
return self

def __len__(self):
n_mols = 0
buff = []
with open(self.filename, 'r') as fp:
for line in fp.readlines():
if '@<TRIPOS>MOLECULE' in line:
n_mols += 1
return n_mols


def __next__(self):
""" iterator step """
Expand Down
Loading
Loading