-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpharmacophore_reward.py
158 lines (139 loc) · 6.28 KB
/
pharmacophore_reward.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import collections
import itertools
import os
import numpy as np
from rdkit import RDConfig, Chem, Geometry
from rdkit.Chem import ChemicalFeatures, rdDistGeom, rdMolTransforms
from rdkit.Chem.Pharm3D import EmbedLib, Pharmacophore
from rdkit.Numerics import rdAlignment
from chemtsv2.reward import Reward
def create_pharmacophore():
fdef = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')
feature_factory = ChemicalFeatures.BuildFeatureFactory(fdef)
# 使用提取的药效团信息替换 cluster_centers_sel
# 只保留 Atom ID 1, 14, 16, 20, 29 的信息
cluster_centers_sel = {
'donors': [
(-32.824, 34.567, 36.889), # Atom ID 7: Donor
],
'acceptors': [
(-36.192, 27.965, 27.899), # Atom ID 22: Acceptor
],
'hydrophobics': [
(-31.097, 35.854, 33.583), # Atom ID 2: Hydrophobics
(-33.827, 36.111, 35.259), # Atom ID 6: Hydrophobics
(-35.333, 33.094, 31.960), # Atom ID 16: Hydrophobics
(-36.644, 30.083, 28.983), # Atom ID 20: Hydrophobics
],
'aromatics': [
(-35.817, 28.984, 28.793), # 中心点 1 (原子 19, 20, 21, 22, 23, 27
(-34.484, 31.647, 31.356), # 中心点 2 (原子 15, 16, 11, 10, 18, 19)
],
}
type_dict = {
'donors': 'Donor',
'acceptors': 'Acceptor',
'hydrophobics': 'Hydrophobe',
'aromatics': 'Aromatic',
}
radi_dict = {
'Donor': 0.5,
'Acceptor': 0.5,
'Hydrophobe': 1.0,
'Aromatic': 1.0,
}
ph4Feats = []
for ptype, coords in cluster_centers_sel.items():
for coord in coords:
feat = ChemicalFeatures.FreeChemicalFeature(
type_dict[ptype],
Geometry.Point3D(coord[0], coord[1], coord[2])
)
ph4Feats.append(feat)
pcophore = Pharmacophore.Pharmacophore(ph4Feats)
#print(pcophore)
bmatrix = pcophore._boundsMat
for i, j in zip(*np.where(np.triu(bmatrix) > 0)):
i_type = pcophore.getFeature(i).GetFamily()
j_type = pcophore.getFeature(j).GetFamily()
pcophore.setUpperBound(i, j, bmatrix[i, j] + (radi_dict[i_type] + radi_dict[j_type]))
for i, j in zip(*np.where(np.tril(bmatrix) > 0)):
i_type = pcophore.getFeature(i).GetFamily()
j_type = pcophore.getFeature(j).GetFamily()
pcophore.setLowerBound(i, j, max(bmatrix[i, j] - (radi_dict[i_type] + radi_dict[j_type]), 0))
#print(pcophore)
return pcophore, feature_factory
PCOPHORE, FEATURE_FACTORY = create_pharmacophore()
def get_transform_matrix(alignRef, confEmbed, atomMatch):
"""
Author: Greg Landrum, Date: Nov 4, 2016
URL: https://github.com/rdkit/UGM_2016/blob/master/Notebooks/Stiefl_RDKitPh4FullPublication.ipynb
"""
alignProbe = []
for matchIds in atomMatch:
dummyPoint = Geometry.Point3D(0.0, 0.0, 0.0)
for mid in matchIds:
dummyPoint += confEmbed.GetAtomPosition(mid)
dummyPoint /= len(matchIds)
alignProbe.append(dummyPoint)
ssd, transformMatrix = rdAlignment.GetAlignmentTransform(alignRef, alignProbe)
return ssd, transformMatrix
def transform_embeddings(pcophore, embeddings, atomMatch):
"""
Author: Greg Landrum, Date: Nov 4, 2016
URL: https://github.com/rdkit/UGM_2016/blob/master/Notebooks/Stiefl_RDKitPh4FullPublication.ipynb
"""
alignRef = [f.GetPos() for f in pcophore.getFeatures()]
ssds = []
for embedding in embeddings:
conformer = embedding.GetConformer()
ssd, transformMatrix = get_transform_matrix(alignRef, conformer, atomMatch)
rdMolTransforms.TransformConformer(conformer, transformMatrix)
ssds.append(ssd)
return ssds
class Pharmacophore_reward(Reward):
def get_objective_functions(conf):
def PharmacophoreSearch(mol):
can_match, all_matches = EmbedLib.MatchPharmacophoreToMol(mol, FEATURE_FACTORY, PCOPHORE)
ph4_feat_counter = collections.Counter([f.GetFamily() for f in PCOPHORE.getFeatures()])
if can_match:
lig_feat_counter = collections.Counter([f.GetFamily() for f in itertools.chain.from_iterable(all_matches)])
match_list = [f"{k}:{lig_feat_counter[k]}:{ph4_feat_counter[k]}" for k in ph4_feat_counter.keys()]
else:
lig_feat_counter = collections.Counter([f.GetFamily() for f in FEATURE_FACTORY.GetFeaturesForMol(mol)])
match_list = [f"{k}:{lig_feat_counter[k]}:{ph4_feat_counter[k]}" for k in ph4_feat_counter.keys()]
return [match_list, None]
bm = rdDistGeom.GetMoleculeBoundsMatrix(mol)
failed, _, matched, _ = EmbedLib.MatchPharmacophore(all_matches, bm, PCOPHORE, useDownsampling=False)
if failed:
return [match_list, None]
atom_match = [list(x.GetAtomIds()) for x in matched]
molH = Chem.AddHs(mol)
Chem.AssignStereochemistry(molH, force=True, cleanIt=True)
try:
_, embeddings, _ = EmbedLib.EmbedPharmacophore(molH, atom_match, PCOPHORE, count=20, randomSeed=42, silent=True)
except ValueError as e:
return [match_list, None]
ssds = transform_embeddings(PCOPHORE, embeddings, atom_match)
if len(ssds) == 0:
return [match_list, None]
best_fit_idx = min(enumerate(ssds), key=lambda x:x[1])[0]
embeddings[best_fit_idx].SetDoubleProp('ssd', ssds[best_fit_idx])
output_conf_dir = os.path.join(conf['output_dir'], 'conformer')
os.makedirs(output_conf_dir, exist_ok=True)
writer = Chem.SDWriter(os.path.join(output_conf_dir, f"mol_{conf['gid']}.sdf"))
writer.write(embeddings[best_fit_idx])
writer.close()
return [match_list, ssds[best_fit_idx]]
return [PharmacophoreSearch]
def calc_reward_from_objective_values(values, conf):
match_list, ssd = values[0]
ligf_total = 0
ph4f_total = 0
for m in match_list:
_, ligf_cnt, ph4f_cnt = m.split(":")
ph4f_total += int(ph4f_cnt)
ligf_total += min(int(ligf_cnt), int(ph4f_cnt))
feat_match_ratio = ligf_total / ph4f_total
ssd_score = 0 if ssd is None else 1 / (1 + ssd)
return (1*feat_match_ratio + 4*ssd_score) / 5