forked from beikwx/SailVina
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvina_validator.py
245 lines (215 loc) · 9.46 KB
/
vina_validator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
import os
import sys
import urllib.request
from socket import timeout
from urllib.error import URLError, HTTPError
from xml.dom import minidom
import shutil
from tools.receptor_processor import prepare_receptor, gen_config
from tools.file_processor import mk_output_dir
from tools.format_convertor import sdf_2_pdb_add_mw, pdb_mol2_2_pdbqt, \
extract_pdbqt, ob_noh_xyz, pdbqt_2_pdb
from tools.dock_processor import vina_dock
from tools.read_scores import read_scores
from tools.rmsd import charnley_cal_rmsd
MAX_RMSD = 2.0
MAX_LIGAND = 500
def validate_root_folder(root_folder):
"""
验证一个包含多个受体的目录
:param root_folder: 包含多个受体的目录
:return:
"""
path_file_list = os.listdir(root_folder)
for path_file in path_file_list:
folder_path = os.path.join(root_folder, path_file)
if os.path.isdir(folder_path):
if not validate_folder(folder_path):
print("无法验证%s" % folder_path)
continue
def validate_folder(target_folder):
"""
用于验证一个文件夹中的pdb用vina对接是否可靠。
:param target_folder: 验证的文件夹。里面包含需要的文件。
比如选择D:\3lnk
里面包含
- 3lnk_ligand.sdf:共晶配体文件
- 3lnk_pocket.pdb:位点文件。如果没有则对接整个蛋白
- 3lnk_protein.pdb:蛋白文件
:return:出现错误返回假,完成返回真
"""
if not os.path.isdir(target_folder):
print("选择的必须是一个目录!")
return False
# 创建处理目录
process_folder = os.path.join(target_folder, "process")
mk_output_dir(process_folder)
# 获取其中的文件
input_protein = None
input_ligand = None
input_pocket = None
config_file = []
# 获取pdbid,所选文件夹就是
pdb_id = os.path.split(target_folder)[-1]
if len(pdb_id) != 4:
print("%s不是pdbid!" % pdb_id)
return False
print("----------正在解析文件夹%s----------" % target_folder)
files = os.listdir(target_folder)
for file in files:
if "protein" in file:
input_protein = os.path.join(target_folder, file)
if "ligand" in file:
if file.endswith("sdf") or file.endswith("pdbqt"):
input_ligand = os.path.join(target_folder, file)
if "pocket" in file:
input_pocket = os.path.join(target_folder, file)
if "config" in file:
config_file.append(os.path.join(target_folder, file))
if input_protein is None or input_ligand is None:
print("%s缺少配体或者受体,无法验证!" % target_folder)
return False
if input_pocket is None:
print("%s缺少口袋文件,将搜索整个蛋白进行重对接" % target_folder)
print("输入的受体是%s:" % input_protein)
print("输入的配体是%s:" % input_ligand)
print("输入的口袋是%s:" % input_pocket)
print("----------准备从网络获取pdb信息----------")
# 1.获取pdb信息
# 1.1目标网址https://www.rcsb.org/pdb/rest/describePDB?structureId=xxxx
website = "https://www.rcsb.org/pdb/rest/describePDB?structureId=%s" % pdb_id
try:
response = urllib.request.urlopen(website, timeout=10)
except HTTPError as e:
print("服务器无法处理请求!")
print("错误代码: ", e.code)
print("----------无法获取pdb信息------")
pdb_title = pdb_id
pdb_keywords = "not_found"
print("该pdb的标题是:%s" % pdb_title)
print("该pdb的关键词是:%s" % pdb_keywords)
except URLError as e:
print("无法连接到服务器!")
print("原因: ", e.reason)
print("----------无法获取pdb信息------")
pdb_title = pdb_id
pdb_keywords = "not_found"
print("该pdb的标题是:%s" % pdb_title)
print("该pdb的关键词是:%s" % pdb_keywords)
except timeout:
print("连接服务器超时!")
print("----------无法获取pdb信息------")
pdb_title = pdb_id
pdb_keywords = "not_found"
print("该pdb的标题是:%s" % pdb_title)
print("该pdb的关键词是:%s" % pdb_keywords)
else:
xml_data = response.read().decode("utf-8") # 1.2解析xml文件
dom = minidom.parseString(xml_data)
pdb_node = dom.documentElement.getElementsByTagName('PDB')[0]
pdb_title = pdb_node.getAttribute("title")
pdb_keywords = pdb_node.getAttribute("keywords")
print("该pdb的标题是:%s" % pdb_title)
print("该pdb的关键词是:%s" % pdb_keywords)
# 2.进行对接
# 2.1准备受体
preped_protein = os.path.join(process_folder, "preped.pdbqt")
if input_protein.endswith(".pdbqt"):
print("----------发现准备过的受体----------")
shutil.copy(input_protein, preped_protein)
else:
print("----------准备受体----------")
result = prepare_receptor(input_protein, preped_protein, "None", 0, 1, 1, 1, 1)
if not result:
print("----------准备受体失败,受体可能存在问题,请修复后重新尝试----------")
return False
# 2.2准备配体
preped_ligand = os.path.join(process_folder, "ligand.pdbqt")
pdb_ligand = None
if input_ligand.endswith(".pdbqt"):
print("----------发现准备过的配体----------")
shutil.copy(input_ligand, preped_ligand)
else:
print("----------准备配体----------")
pdb_ligand = os.path.join(process_folder, "ligand.pdb")
mw = sdf_2_pdb_add_mw(input_ligand, pdb_ligand)
if mw > MAX_LIGAND:
print("所选分子的分子量为%f大于%f" % (mw, MAX_LIGAND))
return False
pdb_mol2_2_pdbqt(pdb_ligand, preped_ligand)
# 2.3准备config文件
if len(config_file) == 0:
if input_pocket is None:
print("----------准备config文件----------")
gen_config(preped_protein, preped_ligand)
else:
print("----------检测到口袋,准备口袋------")
preped_pocket = os.path.join(process_folder, "preped_pocket.pdbqt")
prepare_receptor(input_pocket, preped_pocket, "None", 0, 1, 1, 1, 1)
print("----------准备config文件----------")
gen_config(preped_pocket, preped_ligand)
else:
print("----------检测到config文件----------")
for f in config_file:
file_name = os.path.split(f)[-1]
target_config_file = os.path.join(process_folder, file_name)
shutil.copy(f, target_config_file)
# 2.4进行对接
configs = os.listdir(process_folder)
output_folder = os.path.join(target_folder, "Output")
mk_output_dir(output_folder)
for config in configs:
if "config" in config:
config_file = os.path.join(process_folder, config)
dock_output_file = os.path.join(output_folder, pdb_id + "_" + str(config[:-4]) + ".pdbqt")
vina_dock(preped_ligand, preped_protein, config_file, dock_output_file)
# 3.输出结果文件
extract_folder = os.path.join(target_folder, "Extract")
mk_output_dir(extract_folder)
dock_output_files = os.listdir(output_folder)
for dock_output_file in dock_output_files:
# 3.1切割对接结果文件
pdbqt_file = os.path.join(output_folder, dock_output_file)
extract_pdbqt(pdbqt_file, extract_folder, 0)
# 3.2读取切割的每个文件的分数,同时计算RMSD
xyz_folder = os.path.join(target_folder, "XYZ")
mk_output_dir(xyz_folder)
print("----------转换原始配体文件格式----------")
xyz_ligand = os.path.join(xyz_folder, "ligand.xyz")
if pdb_ligand is None:
pdb_ligand = preped_ligand[:-2]
pdbqt_2_pdb(preped_ligand, pdb_ligand)
ob_noh_xyz(pdb_ligand, xyz_ligand)
print("----------输出报告---------")
report_file = os.path.join(target_folder, "report.txt")
last_validate_folder = os.path.join(target_folder, "Validate")
mk_output_dir(last_validate_folder)
with open(report_file, "w") as f:
f.writelines("Title:\t%s\n" % pdb_title)
f.writelines("Keywords:\t%s\n" % pdb_keywords)
f.writelines("Ligand_name\tscores\trmsd\n")
for output_ligand in os.listdir(extract_folder):
if output_ligand.endswith(".pdbqt"):
ligand_name = output_ligand[:-6]
sec_ligand = os.path.join(extract_folder, output_ligand)
# 读取分数
score = read_scores(sec_ligand)[0]
# 转换格式
sec_pdb_ligand = os.path.join(xyz_folder, ligand_name + ".pdb")
sec_xyz_ligand = os.path.join(xyz_folder, ligand_name + ".xyz")
pdbqt_2_pdb(sec_ligand, sec_pdb_ligand)
ob_noh_xyz(sec_pdb_ligand, sec_xyz_ligand)
# 删除中间体
os.remove(sec_pdb_ligand)
# 计算rmsd
rmsd = charnley_cal_rmsd(xyz_ligand, sec_xyz_ligand, "none", "hungarian")
# RMSD达到要求,复制文件到Validate目录
if float(rmsd) <= MAX_RMSD:
dst = os.path.join(last_validate_folder, ligand_name + ".pdbqt")
shutil.copy(sec_ligand, dst)
f.writelines("%s\t%s\t%s\n" % (ligand_name, score, rmsd))
print("----------验证%s结束----------" % target_folder)
return True
if __name__ == '__main__':
root_path = sys.argv[1]
validate_root_folder(root_path)