generated from Quantum-Accelerators/template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
6,451 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#!/bin/tcsh | ||
|
||
######################################################################### | ||
# # | ||
# This script submits slurm jobs for ligand+protein and the ligand only # | ||
# calculations. # | ||
# # | ||
######################################################################### | ||
|
||
|
||
#Call to the energy calculations for the ligand+protein and the ligand | ||
set nSplits = `ls -ltr moleculeLists/fiveSplits/file* | wc` | ||
|
||
foreach i(`seq -w 0 $nSplits`) | ||
sed "s/XX/$i/g" run_template.slurm > run$i.slurm | ||
sbatch run$i.slurm | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
import os, glob, sys | ||
from rdkit import Chem | ||
from multiprocessing import Pool | ||
from rdkit import Chem | ||
from rdkit.Chem import AllChem | ||
import numpy as np | ||
|
||
########################################################################### | ||
# # | ||
# This program takes in one argument for the file index for the file # | ||
# inside the directory called moleculeLists.txt named something like # | ||
# file000, file001 etc. that contains the names of five drugs. The energy # | ||
# calculations for these drugs is submitted together. This way, I # | ||
# parallelize the calculations with brute force separate job submitions. # | ||
# # | ||
########################################################################### | ||
|
||
molList = open('moleculeLists/fiveSplits/file' + sys.argv[1] + '.txt', 'r').readlines() | ||
molList = [mol.strip() for mol in molList] | ||
enzymeFile = '../../3sxr_dasatinib_removed.pdb' | ||
|
||
os.chdir('rDock_inputs/') | ||
for ligandName in molList: | ||
os.chdir(ligandName) | ||
suppl = Chem.SDMolSupplier(ligandName + '_docking_out_sorted.sd') | ||
mols = [x for x in suppl] | ||
m1 = mols[-1] | ||
m2 = Chem.rdmolfiles.MolFromPDBFile(enzymeFile) | ||
|
||
#Generating the ligand + protein geometry | ||
combo = Chem.CombineMols(m1, m2) | ||
combo = Chem.AddHs(combo, addCoords=True) | ||
Chem.rdmolfiles.MolToPDBFile(combo, 'combo.pdb') | ||
|
||
#Calculating the energy for the (ligand + protein) geometry | ||
#As I say in the other document, xtb did not work and I ended up using rdkit | ||
#for this step | ||
#os.system('xtb --gfnff combo.xyz > combo.log') | ||
res = AllChem.MMFFOptimizeMoleculeConfs(combo, maxIters=0, numThreads=0) | ||
|
||
#Calculating the energies for the ligand only geometry | ||
m1 = Chem.AddHs(m1, addCoords=True) | ||
res1 = AllChem.MMFFOptimizeMoleculeConfs(m1, maxIters=0, numThreads=0) | ||
|
||
#Saving the energies for the (ligand + protein) and the ligand only geometry in a file | ||
np.savetxt('energies.txt', [res[0][1], res1[0][1]]) | ||
os.chdir('../') | ||
os.chdir('../') | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
import os, glob, re, sys | ||
import numpy as np | ||
|
||
####################################################################### | ||
# # | ||
# This script extracts the energies for the three components required # | ||
# in the binding energy calculations, which are (protein + ligand), # | ||
# protein, and ligand energies. It calculates the binding energies # | ||
# for all the drug molecules and prints them in kcal/mol. # | ||
# # | ||
####################################################################### | ||
|
||
|
||
os.chdir('moleculeLists') | ||
molList = open('fileList.txt', 'r').readlines() | ||
molList = [mol.strip() for mol in molList] | ||
os.chdir('../') | ||
|
||
e1 = np.loadtxt('energy_protein/energies.txt') | ||
|
||
os.chdir('rDock_inputs') | ||
for mol in molList: | ||
os.chdir(mol) | ||
try: | ||
eCombined = np.loadtxt('energies.txt') | ||
print("%s, %3f, %3f, %3f, %3f, %3f" % (mol, eCombined[0], eCombined[1], e1, (eCombined[0] - eCombined[1] - e1), (eCombined[0] - eCombined[1] - e1)*627.503)) | ||
os.chdir('../') | ||
except: | ||
os.chdir('../') | ||
continue | ||
print(mol, 'skipped due to some error') | ||
os.chdir('../') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#!/bin/tcsh | ||
|
||
################################################################# | ||
# # | ||
# This script extracts the docking scores for all the drugs and # | ||
# sorts them. # | ||
# # | ||
################################################################# | ||
|
||
rm -f allScores.txt | ||
|
||
cd rDock_inputs/ | ||
foreach mol(`cat ../moleculeLists/fileList.txt | xargs`) | ||
cd $mol | ||
set score = `grep -iwA1 "<SCORE>" "$mol"_docking_out_sorted.sd | tail -1` | ||
echo "$mol $score" >> ../../allScores.txt | ||
cd .. | ||
end | ||
cd .. | ||
|
||
sort -k2g allScores.txt > sorted_allScores.txt | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#This script extracts the smiles from the file named as drugs.txt | ||
import os | ||
def getSmilesFromFile(fileName): | ||
INF = open(fileName,'r').readlines() | ||
tmp1 = [i.strip().split() for i in INF][1:] | ||
molNames, smiles = [], [] | ||
for i in tmp1: | ||
if (i[-1] == 'TRUE') or (i[-1] == 'FALSE'): | ||
continue | ||
else: | ||
if len(i) > 3: | ||
molNames.append('_'.join([j.replace('/','-') for j in i[:-2]])) | ||
smiles.append(i[-1]) | ||
elif len(i) == 3: | ||
molNames.append(i[0]) | ||
smiles.append(i[2]) | ||
else: | ||
print('something wrong with ', i, '.') | ||
|
||
#Writing the names of molecules in a file inside a separate directory | ||
#for future calculations | ||
if not os.path.exists('moleculeLists'): | ||
os.mkdir('moleculeLists') | ||
os.chdir('moleculeLists') | ||
with open("fileList.txt", 'w') as file: | ||
file.write('\n'.join(molNames)) | ||
|
||
return [molNames, smiles] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
import getSmilesFromFile | ||
from rdkit import Chem | ||
from rdkit.Chem import AllChem | ||
import os | ||
import xyzFromSmiles | ||
|
||
############################################################ | ||
# # | ||
# This program generates the xyz coordinates for the drugs # | ||
# molecules and the rdock inputs # | ||
# # | ||
############################################################ | ||
|
||
#Extracting the names and the smiles for all the drug molecules | ||
molNames, smiles = getSmilesFromFile.getSmilesFromFile('drugs.txt') | ||
|
||
#Creating a separate directory for the docking calculations | ||
if not os.path.exists('rDock_inputs'): | ||
os.mkdir('rDock_inputs') | ||
os.chdir('rDock_inputs') | ||
|
||
#Loading a prm file template to be edited later for each molecule | ||
f = open('prm-template.prm', 'r').read() | ||
|
||
for i in range(len(molNames)): | ||
#Creating a separate directory for each drug's calculation | ||
if not os.path.exists(molNames[i]): | ||
os.mkdir(molNames[i]) | ||
os.chdir(molNames[i]) | ||
|
||
#Generating the xyz coordinates from smiles for the drug molecule | ||
#and writing to a file in the sdf format | ||
xyzFromSmiles.xyzFromSmiles(smiles[i], molNames[i]) | ||
|
||
#Generate prm file with the use of the template loaded earlier | ||
f1 = f.replace('YYYYY', molNames[i]) | ||
file1 = open(molNames[i] + '_rdock.prm', "w") | ||
file1.write(f1) | ||
file1.close() | ||
|
||
os.chdir('../') | ||
os.chdir('../') | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
#!/bin/bash | ||
|
||
#Activating a conda environment that contains rdock | ||
conda activate docking-rdock | ||
|
||
####################################################################### | ||
# # | ||
# The python program main.py below does the preprocessing on the data # | ||
# provided inside the file drugs.txt. It extracts smiles format and # | ||
# generats the xyz coordinates for each molecule. It also creates the # | ||
# prm files for each them and stores them in separate directories for # | ||
# the docking calculations. # | ||
# # | ||
####################################################################### | ||
python main.py | ||
|
||
source deactivate | ||
|
||
#Generating a mol2 file for the receptor from the user-provided pdb | ||
babel -ipdb 3sxr_dasatinib_removed.pdb -omol2 receptorFile.mol2 | ||
|
||
######################################################################## | ||
# # | ||
# A serial implementation is written below and hence it should only be # | ||
# used for small number of input drugs molecules. I ended up splitting # | ||
# the calculations into multiple parts. # | ||
# # | ||
######################################################################## | ||
|
||
cd rDock_inputs | ||
for mol in $( ls -d1 * ); do | ||
cd $mol | ||
echo "$mol" | ||
rm *log *docking*sd *.as *.mol2 | ||
sed "s/YYYYY/$mol/g" ../../prm-template.prm > "$mol"_rdock.prm | ||
cp ../../receptorFile.mol2 "$mol"_rdock.mol2 | ||
rbcavity -r "$mol"_rdock.prm -W > "$mol"_cavity.log | ||
rbdock -r "$mol"_rdock.prm -p dock.prm -n 10 -i "$mol".sd -o "$mol"_docking_out > "$mol"_docking_out.log | ||
sdsort -n -f'SCORE' "$mol"_docking_out.sd > "$mol"_docking_out_sorted.sd | ||
cd .. | ||
done | ||
cd .. | ||
|
||
#Extracting the docking scores for all the drug molecules | ||
./getScores.tcsh | ||
|
||
############################################################### | ||
# # | ||
# Calculating the binding energy for protein ligand complexes # | ||
# # | ||
############################################################### | ||
|
||
#Splitting the drugs into sets of 5 to parallelize the energy calculation process | ||
mkdir -p moleculeLists | ||
cd moleculeLists | ||
mkdir -p fiveSplits | ||
cd fiveSplits | ||
split -n l/5 --suffix-length=3 --additional-suffix=.txt --numeric-suffixes ../fileList.txt file | ||
cd .. | ||
cd .. | ||
|
||
#Call to the energy calculations for the ligand+protein and the ligand | ||
./call_combine.tcsh | ||
|
||
#Calling a job submission script for the energy calculations for the protein. | ||
#It requires the user to create a directory named energy_protein with the | ||
#script protein.py and a slurm script run.slurm to be present in that directory. | ||
#This step can obviously be done cleanly but this is how I did it. | ||
cd energy_protein | ||
sbatch run.slurm | ||
cd .. | ||
|
||
#Call to energy grepping python script | ||
python getEnergy.py > all_energies.txt | ||
sort -k6rg all_energies.txt | column -t > sorted_energies.txt | ||
|