Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prep v0.6.0 #226

Merged
merged 9 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
include README.md
include LICENSE
include test/*
include example/*
include meeko/data/*
include meeko/tmp/*
include meeko/utils/*
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Meeko: preparation of small molecules for AutoDock

[![API stability](https://img.shields.io/badge/stable%20API-no-orange)](https://shields.io/)
[![PyPI version fury.io](https://img.shields.io/badge/version-0.6.0--alpha.3-green.svg)](https://pypi.python.org/pypi/meeko/)
[![PyPI version fury.io](https://img.shields.io/badge/version-0.6.0-green.svg)](https://pypi.python.org/pypi/meeko/)

Meeko reads an RDKit molecule object and writes a PDBQT string (or file)
for [AutoDock-Vina](https://github.com/ccsb-scripps/AutoDock-Vina)
Expand Down Expand Up @@ -293,7 +293,6 @@ naming scheme for atoms and residues, e.g., `HIE` or `HID`
instead of `HIS`.

### 2. Prepare protein pdbqt
Here, `wk.pdb` was written by waterkit.
Here, `wk.pdb` was written by waterkit. The example below will center a gridbox of specified size on the given reactive residue.

```console
Expand Down
2 changes: 1 addition & 1 deletion meeko/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# Meeko
#

__version__ = "0.6.0-alpha.3"
__version__ = "0.6.0"

try:
import prody
Expand Down
11 changes: 10 additions & 1 deletion meeko/analysis/fingerprint_interactions.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,12 @@
import os

import numpy as np
import pandas as pd
try:
import pandas as pd
_got_pandas = True
except ImportError as error:
_got_pandas = False
_pandas_import_error = error

from .interactions import Hydrophobic, Reactive, Metal
from .interactions import HBDonor, HBAcceptor, WaterDonor, WaterAcceptor
Expand Down Expand Up @@ -89,6 +94,10 @@ def to_dataframe(self, remove_common=False):
found between the molecules and the receptor

"""

if not _got_pandas:
raise _pandas_import_error

count = 0
resid_to_idx_encoder = {}
columns = [[], []]
Expand Down
Empty file added meeko/cli/__init__.py
Empty file.
159 changes: 159 additions & 0 deletions meeko/cli/mk_export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
#

import argparse
import gzip
import pathlib
import sys
import warnings
import copy
import numpy as np

from rdkit import Chem

from meeko import PDBQTMolecule
from meeko import RDKitMolCreate
from meeko import Polymer
from meeko import export_pdb_updated_flexres
from meeko.utils.utils import parse_begin_res


def cmd_lineparser():
parser = argparse.ArgumentParser(
description='Export docked ligand to SDF, and receptor to PDB',
)
parser.add_argument(dest='docking_results_filename', nargs = "+",
help='Docking output file(s), either PDBQT \
file from Vina or DLG file from AD-GPU.')
parser.add_argument('-s', '--write_sdf', metavar='output_SDF_filename',
help="defaults to input filename with suffix from --sufix")
parser.add_argument(
'-p',
'--write_pdb',
metavar='output_PDB_filename',
help="defaults to input filename with suffix from --suffix",
)
parser.add_argument("--suffix", default="_docked",
help="suffix for output filesnames that are not explicitly set")
parser.add_argument('-j', '--read_json', metavar='input_JSON_filename',
help="receptor written by mk_prepare_receptor -j/--write_json")
parser.add_argument('--all_dlg_poses', action='store_true',
help="write all AutoDock-GPU poses, not just cluster leads.")
parser.add_argument('-k', '--keep_flexres_sdf', action='store_true',
help="add flexres, if any, to SDF ouput")
parser.add_argument('-', '--', dest='redirect_stdout', action='store_true',
help="do not write SDF file, just print it to STDOUT")
return parser.parse_args()

def main():
args = cmd_lineparser()

docking_results_filenames = args.docking_results_filename
write_sdf = args.write_sdf
write_pdb = args.write_pdb
read_json = args.read_json
suffix = args.suffix
all_dlg_poses = args.all_dlg_poses
keep_flexres_sdf = args.keep_flexres_sdf
redirect_stdout = args.redirect_stdout

if (
(write_sdf is not None or write_pdb is not None) and
len(docking_results_filenames) > 1
):
msg = "With multiple input files, the output filenames are based on the"
msg += "input filename. The suffix can be controlled with option --suffix."
msg += "--write options are incompatible with multiple input files"
print("--write options incompatible with multiple input files", file=sys.stderr)
sys.exit(2)

if redirect_stdout and len(docking_results_filenames) > 1:
print("option -/-- incompatible with multiple input files", file=sys.stderr)
sys.exit(2)

if read_json is not None:
with open(read_json) as f:
json_string = f.read()
polymer = Polymer.from_json(json_string)
else:
polymer = None
if write_pdb is not None:
print("option -p (write pdb) requires -j (receptor receptor file)", file=sys.stderr)
sys.exit(2)


for filename in docking_results_filenames:
is_dlg = filename.endswith('.dlg') or filename.endswith(".dlg.gz")
if filename.endswith(".gz"):
with gzip.open(filename) as f:
string = f.read().decode()
else:
with open(filename) as f:
string = f.read()
mol_name = pathlib.Path(filename).with_suffix("").name
pdbqt_mol = PDBQTMolecule(
string,
name=mol_name,
is_dlg=is_dlg,
skip_typing=True,
)
only_cluster_leads = is_dlg and not all_dlg_poses
sdf_string, failures = RDKitMolCreate.write_sd_string(
pdbqt_mol,
only_cluster_leads=only_cluster_leads,
keep_flexres=keep_flexres_sdf,
)
for i in failures:
warnings.warn("molecule %d not converted to RDKit/SD File" % i)
if sdf_string == "":
warnings.warn("sdf_string does not contain molecular data.")
if redirect_stdout or write_pdb:
pass
else:
print("Output SDF will not be created because there is no pose data for ligand. \n"
+ "Maybe the input poses only contain flexible sidechains and \n"
+ "keep_flexres_sdf is set to False. \n"
+ "Use -k with mk_export.py to retain the flexres and write to output SDF File. ")
sys.exit(2)
if len(failures) == len(pdbqt_mol._atom_annotations["mol_index"]):
msg = "\nCould not convert to RDKit. Maybe meeko was not used for preparing\n"
msg += "the input PDBQT for docking, and the SMILES string is missing?\n"
msg += "Except for standard protein sidechains, all ligands and flexible residues\n"
msg += "require a REMARK SMILES line in the PDBQT, which is added automatically by meeko."
raise RuntimeError(msg)
if not redirect_stdout:
if write_sdf is None:
fn = pathlib.Path(filename).with_suffix("").name + f"{suffix}.sdf"
else:
fn = write_sdf
with open(fn, "w") as f:
f.write(sdf_string)
else:
print(sdf_string)

# write receptor with updated flexres
if read_json is not None:
pdb_string = ""
pose_id_to_iter = [pose.pose_id for pose in pdbqt_mol]
iter_pose = copy.deepcopy(pdbqt_mol)
for pose_id in pose_id_to_iter:
model_nr = pose_id + 1
iter_pose._positions = np.array([pdbqt_mol._positions[pose_id]])
iter_pose._pose_data['n_poses'] = 1 # Update the number of poses to reflect the reduction
iter_pose._current_pose = 0 # Reset to the first (and only) pose
pdb_string += "MODEL " + f"{model_nr:8}" + pathlib.os.linesep
pol_copy = copy.deepcopy(polymer)
pdb_string += export_pdb_updated_flexres(pol_copy, iter_pose)
pdb_string += "ENDMDL" + pathlib.os.linesep
if write_pdb is None:
fn = pathlib.Path(filename).with_suffix("").name + f"{suffix}.pdb"
else:
fn = write_pdb
with open(fn, "w") as f:
f.write(pdb_string)
return

if __name__ == "__main__":
sys.exit(main())
Original file line number Diff line number Diff line change
Expand Up @@ -494,9 +494,7 @@ def get_suffixes(molsetups):
else:
return tuple("mk%d" % (i + 1) for i in range(len(molsetups)))


if __name__ == "__main__":

def main():
args, config, backend, is_covalent = cmd_lineparser()
input_molecule_filename = args.input_molecule_filename

Expand Down Expand Up @@ -656,3 +654,7 @@ def get_suffixes(molsetups):
elif input_mol_with_failure > 0:
print("Some molecules encountered errors!")
sys.exit(4) # partial failure
return

if __name__ == '__main__':
sys.exit(main())
Loading
Loading