-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathconfig.py
158 lines (142 loc) · 5.59 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
######################################################################################
# Author: Srijan Verma, BITS Pilani, India #
# Code developed in Sirimulla Research Group (http://sirimullaresearchgroup.com/) #
# University of Texas at El Paso, Tx, USA #
# Last modified: 25/08/2020 #
######################################################################################
from rdkit.Chem import MACCSkeys, AllChem
from rdkit.Avalon import pyAvalonTools as fpAvalon
from rdkit.Chem import rdMolDescriptors
import tempfile, os
import shutil
from rdkit.ML.Descriptors import MoleculeDescriptors
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem, Descriptors
# RDKit descriptors -->
calc = MoleculeDescriptors.MolecularDescriptorCalculator([x[0] for x in Descriptors._descList])
# Function for generating TPATF features, using Mayachem tools
def get_tpatf(m):
# Creates a temp folder
temp_dir = tempfile.mkdtemp()
# Compute 2D coordinates
AllChem.Compute2DCoords(m)
# Save sdf file
w = Chem.SDWriter(os.path.join(temp_dir, "temp.sdf"))
w.write(m)
w.flush()
try:
# Path to perl script
script_path = 'mayachemtools/bin/TopologicalPharmacophoreAtomTripletsFingerprints.pl'
command = "perl " + script_path + " -r " + os.path.join(temp_dir,"temp") + " --AtomTripletsSetSizeToUse FixedSize -v ValuesString -o " + os.path.join(temp_dir, "temp.sdf")
os.system(command)
with open(os.path.join(temp_dir, "temp.csv"), 'r') as f:
for line in f.readlines():
if "Cmpd" in line:
line = line.split(';')[5].replace('"', '')
features = [int(i) for i in line.split(" ")]
except:
features = None
# Delete the temporary directory
shutil.rmtree(temp_dir)
tpatf_arr = np.array(features, dtype=np.float32)
tpatf_arr = tpatf_arr.reshape(1, tpatf_arr.shape[0])
return tpatf_arr
LocInfo_dict =[
{
"ToxDes": {
"dataset_size": 1662,
"actives": 831,
"inactives": 831,
"cohen_k_test": 0.36,
"roc_auc": 0.68,
"f1_score": 0.679,
"Recall": 0.68,
"accuracy": 0.68,
"Precision": 0.682
},
"ToxFP": {
"dataset_size": 1662,
"actives": 831,
"inactives": 831,
"cohen_k_test": 0.392,
"roc_auc": 0.696,
"f1_score": 0.696,
"Recall": 0.696,
"accuracy": 0.696,
"Precision": 0.698
},
"ToxTopo": {
"dataset_size": 1662,
"actives": 831,
"inactives": 831,
"cohen_k_test": 0.368,
"roc_auc": 0.684,
"f1_score": 0.684,
"Recall": 0.684,
"accuracy": 0.684,
"Precision": 0.684
},
"ActFP": {
"dataset_size": 736,
"actives": 368,
"inactives": 368,
"cohen_k_test": 0.392,
"roc_auc": 0.696,
"f1_score": 0.695,
"Recall": 0.696,
"accuracy": 0.696,
"Precision": 0.698
},
"ActDes": {
"dataset_size": 680,
"actives": 340,
"inactives": 340,
"cohen_k_test": 0.216,
"roc_auc": 0.608,
"f1_score": 0.606,
"Recall": 0.608,
"accuracy": 0.608,
"Precision": 0.609
},
"ActTopo": {
"dataset_size": 680,
"actives": 340,
"inactives": 340,
"cohen_k_test": 0.294,
"roc_auc": 0.647,
"f1_score": 0.647,
"Recall": 0.647,
"accuracy": 0.647,
"Precision": 0.647
}
}
]
nbits = 1024
longbits = 16384
# dictionary
fpFunc_dict = {}
fpFunc_dict['ecfp0'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 0, nBits=nbits)
fpFunc_dict['ecfp2'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 1, nBits=nbits)
fpFunc_dict['ecfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, nBits=nbits)
fpFunc_dict['ecfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, nBits=nbits)
fpFunc_dict['fcfp2'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 1, useFeatures=True, nBits=nbits)
fpFunc_dict['fcfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, useFeatures=True, nBits=nbits)
fpFunc_dict['fcfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, useFeatures=True, nBits=nbits)
fpFunc_dict['lecfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, nBits=longbits)
fpFunc_dict['lecfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, nBits=longbits)
fpFunc_dict['lfcfp4'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 2, useFeatures=True, nBits=longbits)
fpFunc_dict['lfcfp6'] = lambda m: AllChem.GetMorganFingerprintAsBitVect(m, 3, useFeatures=True, nBits=longbits)
fpFunc_dict['maccs'] = lambda m: MACCSkeys.GenMACCSKeys(m)
fpFunc_dict['hashap'] = lambda m: rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(m, nBits=nbits)
fpFunc_dict['hashtt'] = lambda m: rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(m, nBits=nbits)
fpFunc_dict['avalon'] = lambda m: fpAvalon.GetAvalonFP(m, nbits)
fpFunc_dict['laval'] = lambda m: fpAvalon.GetAvalonFP(m, longbits)
fpFunc_dict['rdk5'] = lambda m: Chem.RDKFingerprint(m, maxPath=5, fpSize=nbits, nBitsPerHash=2)
fpFunc_dict['rdk6'] = lambda m: Chem.RDKFingerprint(m, maxPath=6, fpSize=nbits, nBitsPerHash=2)
fpFunc_dict['rdk7'] = lambda m: Chem.RDKFingerprint(m, maxPath=7, fpSize=nbits, nBitsPerHash=2)
fpFunc_dict['tpatf'] = lambda m: get_tpatf(m)
fpFunc_dict['rdkDes'] = lambda m: calc.CalcDescriptors(m)
long_fps = {'laval', 'lecfp4', 'lecfp6', 'lfcfp4', 'lfcfp6'}
fps_to_generate = ['fcfp4', 'fcfp2', 'lecfp4', 'lfcfp4', 'rdkDes', 'tpatf', 'rdk5', 'hashtt', 'avalon', 'laval', 'rdk7', 'ecfp4', 'hashap', 'lecfp6', 'maccs']
ModFileName_LoadedModel_dict = {}