diff --git a/README.md b/README.md index 7963f7d..46b4e3e 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,8 @@ sys.path.insert(0, 'path/to/molearn/src') import molearn ``` +> **Note** +> in case of installation issues, please consult our FAQ [molearn.readthedocs.io](https://molearn.readthedocs.io/en/latest/FAQ.html) ## Usage ## diff --git a/docs/build/.buildinfo b/docs/build/.buildinfo index 257c332..0e86985 100644 --- a/docs/build/.buildinfo +++ b/docs/build/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 3a4be983efd1ad1578e3aba1f56753bc +config: fd34726b2b2ff5441545b10d48d8c981 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/docs/build/.doctrees/analysis.doctree b/docs/build/.doctrees/analysis.doctree index 93e364c..8e88fe4 100644 Binary files a/docs/build/.doctrees/analysis.doctree and b/docs/build/.doctrees/analysis.doctree differ diff --git a/docs/build/.doctrees/data.doctree b/docs/build/.doctrees/data.doctree index 0e78a2e..cb3babe 100644 Binary files a/docs/build/.doctrees/data.doctree and b/docs/build/.doctrees/data.doctree differ diff --git a/docs/build/.doctrees/environment.pickle b/docs/build/.doctrees/environment.pickle index 91b4dfc..ed1bb00 100644 Binary files a/docs/build/.doctrees/environment.pickle and b/docs/build/.doctrees/environment.pickle differ diff --git a/docs/build/.doctrees/faq.doctree b/docs/build/.doctrees/faq.doctree index 03910b2..21fceec 100644 Binary files a/docs/build/.doctrees/faq.doctree and b/docs/build/.doctrees/faq.doctree differ diff --git a/docs/build/.doctrees/index.doctree b/docs/build/.doctrees/index.doctree index c8e13b2..cffc055 100644 Binary files a/docs/build/.doctrees/index.doctree and b/docs/build/.doctrees/index.doctree differ diff --git a/docs/build/.doctrees/loss_functions.doctree b/docs/build/.doctrees/loss_functions.doctree index b550835..acdcf93 100644 Binary files a/docs/build/.doctrees/loss_functions.doctree and b/docs/build/.doctrees/loss_functions.doctree differ diff --git a/docs/build/.doctrees/models.doctree b/docs/build/.doctrees/models.doctree index ce5c2ee..395a795 100644 Binary files a/docs/build/.doctrees/models.doctree and b/docs/build/.doctrees/models.doctree differ diff --git a/docs/build/.doctrees/scoring.doctree b/docs/build/.doctrees/scoring.doctree index 831829c..c025d9e 100644 Binary files a/docs/build/.doctrees/scoring.doctree and b/docs/build/.doctrees/scoring.doctree differ diff --git a/docs/build/.doctrees/trainers.doctree b/docs/build/.doctrees/trainers.doctree index 1dfd005..7abbbf7 100644 Binary files a/docs/build/.doctrees/trainers.doctree and b/docs/build/.doctrees/trainers.doctree differ diff --git a/docs/build/_modules/index.html b/docs/build/_modules/index.html index ffc1175..b495099 100644 --- a/docs/build/_modules/index.html +++ b/docs/build/_modules/index.html @@ -1,18 +1,15 @@ - - +
from ..utils import as_numpy
-[docs]class MolearnGUI(object):
+
+[docs]
+class MolearnGUI:
'''
This class produces an interactive visualisation for data stored in a
:func:`MolearnAnalysis <molearn.analysis.MolearnAnalysis>` object,
@@ -88,11 +87,10 @@ Source code for molearn.analysis.GUI
else:
self.MA = MA
- self.waypoints = [] # collection of all saved waypoints
- self.samples = [] # collection of all calculated sampling points
+ self.waypoints = [] # collection of all saved waypoints
+ self.samples = [] # collection of all calculated sampling points
self.run()
-
def update_trails(self):
'''
@@ -108,14 +106,14 @@ Source code for molearn.analysis.GUI
# update latent space plot
if len(self.samples) == 0:
- self.latent.data[2].x = self.waypoints[:, 0]
- self.latent.data[2].y = self.waypoints[:, 1]
+ if len(self.waypoints)>0:
+ self.latent.data[2].x = self.waypoints[:, 0]
+ self.latent.data[2].y = self.waypoints[:, 1]
else:
self.latent.data[2].x = self.samples[:, 0]
self.latent.data[2].y = self.samples[:, 1]
self.latent.update()
-
def on_click(self, trace, points, selector):
'''
@@ -136,14 +134,13 @@ Source code for molearn.analysis.GUI
# update textbox (triggering update of 3D representation)
try:
pt = self.waypoints.flatten().round(decimals=4).astype(str)
- #pt = np.array([self.latent.data[3].x, self.latent.data[3].y]).T.flatten().round(decimals=4).astype(str)
+ # pt = np.array([self.latent.data[3].x, self.latent.data[3].y]).T.flatten().round(decimals=4).astype(str)
self.mybox.value = " ".join(pt)
except Exception:
return
self.update_trails()
-
def get_samples(self, mybox, samplebox, path):
'''
provide a trail of point between list of waypoints, either connected
@@ -159,8 +156,8 @@ Source code for molearn.analysis.GUI
crd = np.array(mybox.split()).astype(float)
crd = crd.reshape((int(len(crd)/2), 2))
except Exception:
- raise Exception("Cannot define sampling points")
- return
+ raise Exception("Cannot define sampling points")
+ return
if use_path:
# connect points via A*
@@ -168,8 +165,8 @@ Source code for molearn.analysis.GUI
landscape = self.latent.data[0].z
crd = get_path_aggregate(crd, landscape.T, self.MA.xvals, self.MA.yvals)
except Exception as e:
- raise Exception(f"Cannot define sampling points: path finding failed. {e})")
- return
+ raise Exception(f"Cannot define sampling points: path finding failed. {e})")
+ return
else:
# connect points via straight line
@@ -180,7 +177,6 @@ Source code for molearn.analysis.GUI
return
return crd
-
def interact_3D(self, mybox, samplebox, path):
'''
@@ -191,7 +187,7 @@ Source code for molearn.analysis.GUI
crd = self.get_samples(mybox, samplebox, path)
self.samples = crd.copy()
crd = crd.reshape((1, len(crd), 2))
- except:
+ except Exception:
self.button_pdb.disabled = True
return
@@ -208,12 +204,11 @@ Source code for molearn.analysis.GUI
self.mymol.load_new(gen)
view = nv.show_mdanalysis(self.mymol)
view.add_representation("spacefill")
- #view.add_representation("cartoon")
+ # view.add_representation("cartoon")
display.display(view)
self.button_pdb.disabled = False
-
def drop_background_event(self, change):
'''
control colouring style of latent space surface
@@ -225,7 +220,7 @@ Source code for molearn.analysis.GUI
mykey = change.new
try:
- data = self.MA.surfaces[mykey]
+ data = self.MA.surfaces[mykey]
except Exception as e:
print(f"{e}")
return
@@ -243,7 +238,7 @@ Source code for molearn.analysis.GUI
self.latent.data[0].zmax = np.max(data)
self.block0.children[1].min = np.min(data)
self.block0.children[1].max = np.max(data)
- except:
+ except Exception:
self.latent.data[0].zmax = np.max(data)
self.latent.data[0].zmin = np.min(data)
self.block0.children[1].max = np.max(data)
@@ -253,7 +248,6 @@ Source code for molearn.analysis.GUI
self.update_trails()
-
def drop_dataset_event(self, change):
'''
control which dataset is displayed
@@ -265,7 +259,7 @@ Source code for molearn.analysis.GUI
else:
try:
- data = as_numpy(self.MA.get_encoded(change.new).squeeze(2))
+ data = as_numpy(self.MA.get_encoded(change.new).squeeze(2))
except Exception as e:
print(f"{e}")
return
@@ -277,7 +271,6 @@ Source code for molearn.analysis.GUI
self.latent.update()
-
def drop_path_event(self, change):
'''
control way paths are looked for
@@ -290,7 +283,6 @@ Source code for molearn.analysis.GUI
self.update_trails()
-
def range_slider_event(self, change):
'''
update surface colouring upon manipulation of range slider
@@ -300,7 +292,6 @@ Source code for molearn.analysis.GUI
self.latent.data[0].zmax = change.new[1]
self.latent.update()
-
def trail_update_event(self, change):
'''
update trails (waypoints and way they are connected)
@@ -309,7 +300,7 @@ Source code for molearn.analysis.GUI
try:
crd = np.array(self.mybox.value.split()).astype(float)
crd = crd.reshape((int(len(crd)/2), 2))
- except:
+ except Exception:
self.button_pdb.disabled = False
return
@@ -317,7 +308,6 @@ Source code for molearn.analysis.GUI
self.update_trails()
-
def button_pdb_event(self, check):
'''
save PDB file corresponding to the interpolation shown in the 3D view
@@ -346,7 +336,6 @@ Source code for molearn.analysis.GUI
for ts in self.mymol.trajectory:
W.write(protein)
-
def button_save_state_event(self, check):
'''
save class state
@@ -360,8 +349,7 @@ Source code for molearn.analysis.GUI
if fname == "":
return
- pickle.dump([self.MA, self.waypoints], open( fname, "wb" ) )
-
+ pickle.dump([self.MA, self.waypoints], open(fname, "wb"))
def button_load_state_event(self, check):
'''
@@ -377,7 +365,7 @@ Source code for molearn.analysis.GUI
return
try:
- self.MA, self.waypoints = pickle.load( open( fname, "rb" ) )
+ self.MA, self.waypoints = pickle.load(open(fname, "rb"))
self.run()
except Exception as e:
raise Exception(f"Cannot load state file. {e}")
@@ -388,7 +376,7 @@ Source code for molearn.analysis.GUI
# create an MDAnalysis instance of input protein (for viewing purposes)
if hasattr(self.MA, "mol"):
- self.MA.mol.write_pdb("tmp.pdb", conformations=[0], split_struc = False)
+ self.MA.mol.write_pdb("tmp.pdb", conformations=[0], split_struc=False)
self.mymol = mda.Universe('tmp.pdb')
### MENU ITEMS ###
@@ -415,7 +403,6 @@ Source code for molearn.analysis.GUI
self.drop_background.observe(self.drop_background_event, names='value')
-
# dataset selector dropdown menu
options2 = ["none"]
if self.MA is not None:
@@ -445,7 +432,6 @@ Source code for molearn.analysis.GUI
self.drop_path.observe(self.drop_path_event, names='value')
-
# text box holding current coordinates
self.mybox = widgets.Textarea(placeholder='coordinates',
description='crds:',
@@ -460,7 +446,6 @@ Source code for molearn.analysis.GUI
self.samplebox.observe(self.trail_update_event, names='value')
-
# button to save PDB file
self.button_pdb = widgets.Button(
description='Save PDB',
@@ -468,23 +453,20 @@ Source code for molearn.analysis.GUI
self.button_pdb.on_click(self.button_pdb_event)
-
# button to save state file
self.button_save_state = widgets.Button(
- description= 'Save state',
+ description='Save state',
disabled=False, layout=Layout(flex='1 1 0%', width='auto'))
self.button_save_state.on_click(self.button_save_state_event)
-
# button to load state file
self.button_load_state = widgets.Button(
- description= 'Load state',
+ description='Load state',
disabled=False, layout=Layout(flex='1 1 0%', width='auto'))
self.button_load_state.on_click(self.button_load_state_event)
-
# latent space range slider
self.range_slider = widgets.FloatRangeSlider(
description='cmap range:',
@@ -502,8 +484,7 @@ Source code for molearn.analysis.GUI
if self.waypoints == []:
self.button_pdb.disabled = True
-
-
+
### LATENT SPACE REPRESENTATION ###
# surface
@@ -541,7 +522,7 @@ Source code for molearn.analysis.GUI
# path
plot3 = go.Scatter(x=np.array([]), y=np.array([]),
- showlegend=False, opacity=0.9, mode = 'lines+markers',
+ showlegend=False, opacity=0.9, mode='lines+markers',
marker=dict(color='red', size=4))
self.latent = go.FigureWidget([plot1, plot2, plot3])
@@ -560,7 +541,7 @@ Source code for molearn.analysis.GUI
try:
self.range_slider.min = scmin
self.range_slider.max = scmax
- except:
+ except Exception:
self.range_slider.max = scmax
self.range_slider.min = scmin
@@ -569,8 +550,7 @@ Source code for molearn.analysis.GUI
# 3D protein representation (triggered by update of textbox, sampling box, or pathfinding method)
self.protein = widgets.interactive_output(self.interact_3D, {'mybox': self.mybox, 'samplebox': self.samplebox, 'path': self.drop_path})
-
-
+
### WIDGETS ARRANGEMENT ###
self.block0 = widgets.VBox([self.drop_dataset, self.range_slider,
@@ -594,7 +574,7 @@ Source code for molearn.analysis.GUI
display.clear_output(wait=True)
display.display(self.scene)
-
+
@@ -632,8 +612,8 @@ Navigation
\ No newline at end of file
diff --git a/docs/build/_modules/molearn/analysis/analyser.html b/docs/build/_modules/molearn/analysis/analyser.html
index d8d6668..6236727 100644
--- a/docs/build/_modules/molearn/analysis/analyser.html
+++ b/docs/build/_modules/molearn/analysis/analyser.html
@@ -1,18 +1,15 @@
-
-
+
molearn.analysis.analyser — molearn 2.0.1 documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -60,7 +57,16 @@ Source code for molearn.analysis.analyser
print('Error importing modeller: ')
print(e)
-from ..scoring import Parallel_DOPE_Score, Parallel_Ramachandran_Score
+try:
+ from ..scoring import Parallel_DOPE_Score
+except ImportError as e:
+ print('Import Error captured while trying to import Parallel_DOPE_Score, it is likely that you dont have Modeller installed')
+ print(e)
+try:
+ from ..scoring import Parallel_Ramachandran_Score
+except ImportError as e:
+ print('Import Error captured while trying to import Parallel_Ramachandran_Score, it is likely that you dont have cctbx/iotbx installed')
+ print(e)
from ..data import PDBData
from ..utils import as_numpy
@@ -69,7 +75,9 @@ Source code for molearn.analysis.analyser
warnings.filterwarnings("ignore")
-[docs]class MolearnAnalysis(object):
+
+[docs]
+class MolearnAnalysis:
'''
This class provides methods dedicated to the quality analysis of a
trained model.
@@ -83,7 +91,9 @@ Source code for molearn.analysis.analyser
self.batch_size = 1
self.processes = 1
-[docs] def set_network(self, network):
+
+[docs]
+ def set_network(self, network):
'''
:param network: a trained neural network defined in :func:`molearn.models <molearn.models>`
'''
@@ -91,13 +101,19 @@ Source code for molearn.analysis.analyser
self.network.eval()
self.device = next(network.parameters()).device
-[docs] def get_dataset(self, key):
+
+
+[docs]
+ def get_dataset(self, key):
'''
:param str key: key pointing to a dataset previously loaded with :func:`set_dataset <molearn.analysis.MolearnAnalysis.set_dataset>`
'''
return self._datasets[key]
-[docs] def set_dataset(self, key, data, atomselect="*"):
+
+
+[docs]
+ def set_dataset(self, key, data, atomselect="*"):
'''
:param data: :func:`PDBData <molearn.data.PDBData>` object containing atomic coordinates
:param str key: label to be associated with data
@@ -127,7 +143,10 @@ Source code for molearn.analysis.analyser
if not hasattr(self, 'shape'):
self.shape = (_data.dataset.shape[1], _data.dataset.shape[2])
-[docs] def get_encoded(self, key):
+
+
+[docs]
+ def get_encoded(self, key):
'''
:param str key: key pointing to a dataset previously loaded with :func:`set_dataset <molearn.analysis.MolearnAnalysis.set_dataset>`
:return: array containing the encoding in latent space of dataset associated with key
@@ -148,13 +167,19 @@ Source code for molearn.analysis.analyser
return self._encoded[key]
-[docs] def set_encoded(self, key, coords):
+
+
+[docs]
+ def set_encoded(self, key, coords):
'''
:param str key: key pointing to a dataset previously loaded with :func:`set_dataset <molearn.analysis.MolearnAnalysis.set_dataset>`
'''
self._encoded[key] = torch.tensor(coords).float()
-[docs] def get_decoded(self, key):
+
+
+[docs]
+ def get_decoded(self, key):
'''
:param str key: key pointing to a dataset previously loaded with :func:`set_dataset <molearn.analysis.MolearnAnalysis.set_dataset>`
'''
@@ -164,23 +189,32 @@ Source code for molearn.analysis.analyser
encoded = self.get_encoded(key)
decoded = torch.empty(encoded.shape[0], *self.shape).float()
for i in tqdm(range(0, encoded.shape[0], batch_size), desc=f'Decoding {key}'):
- decoded[i:i+batch_size] = self.network.decode(encoded[i:i+batch_size].to(self.device))[:,:,:self.shape[1]].cpu()
+ decoded[i:i+batch_size] = self.network.decode(encoded[i:i+batch_size].to(self.device))[:, :, :self.shape[1]].cpu()
self._decoded[key] = decoded
return self._decoded[key]
-[docs] def set_decoded(self, key, structures):
+
+
+[docs]
+ def set_decoded(self, key, structures):
'''
:param str key: key pointing to a dataset previously loaded with :func:`set_dataset <molearn.analysis.MolearnAnalysis.set_dataset>`
'''
self._decoded[key] = structures
-[docs] def num_trainable_params(self):
+
+
+[docs]
+ def num_trainable_params(self):
'''
:return: number of trainable parameters in the neural network previously loaded with :func:`set_dataset <molearn.analysis.MolearnAnalysis.set_network>`
'''
return sum(p.numel() for p in self.network.parameters() if p.requires_grad)
-[docs] def get_error(self, key, align=False):
+
+
+[docs]
+ def get_error(self, key, align=True):
'''
Calculate the reconstruction error of a dataset encoded and decoded by a trained neural network.
@@ -196,21 +230,24 @@ Source code for molearn.analysis.analyser
m = deepcopy(self.mol)
for i in range(dataset.shape[0]):
crd_ref = as_numpy(dataset[i].permute(1,0).unsqueeze(0))*self.stdval + self.meanval
- crd_mdl = as_numpy(decoded[i].permute(1,0).unsqueeze(0))[:, :dataset.shape[2]]*self.stdval + self.meanval #clip the padding of models
- if align: # use Molecule Biobox class to calculate RMSD
+ crd_mdl = as_numpy(decoded[i].permute(1,0).unsqueeze(0))[:, :dataset.shape[2]]*self.stdval + self.meanval # clip the padding of models
+ # use Molecule Biobox class to calculate RMSD
+ if align:
m.coordinates = deepcopy(crd_ref)
m.set_current(0)
m.add_xyz(crd_mdl[0])
rmsd = m.rmsd(0, 1)
else:
- rmsd = np.sqrt(np.sum((crd_ref.flatten()-crd_mdl.flatten())**2)/crd_mdl.shape[1]) # Cartesian L2 norm
+ rmsd = np.sqrt(np.sum((crd_ref.flatten()-crd_mdl.flatten())**2)/crd_mdl.shape[1]) # Cartesian L2 norm
err.append(rmsd)
return np.array(err)
-[docs] def get_dope(self, key, refine=True, **kwargs):
+
+[docs]
+ def get_dope(self, key, refine=True, **kwargs):
'''
:param str key: key pointing to a dataset previously loaded with :func:`set_dataset <molearn.analysis.MolearnAnalysis.set_dataset>`
:param bool refine: if True, refine structures before calculating DOPE score
@@ -219,13 +256,16 @@ Source code for molearn.analysis.analyser
dataset = self.get_dataset(key)
decoded = self.get_decoded(key)
- dope_dataset = self.get_all_dope_score(dataset, refine=refine,**kwargs)
- dope_decoded = self.get_all_dope_score(decoded, refine=refine,**kwargs)
+ dope_dataset = self.get_all_dope_score(dataset, refine=refine, **kwargs)
+ dope_decoded = self.get_all_dope_score(decoded, refine=refine, **kwargs)
+
+ return dict(dataset_dope=dope_dataset,
+ decoded_dope=dope_decoded)
- return dict(dataset_dope = dope_dataset,
- decoded_dope = dope_decoded)
-[docs] def get_ramachandran(self, key):
+
+[docs]
+ def get_ramachandran(self, key):
'''
:param str key: key pointing to a dataset previously loaded with :func:`set_dataset <molearn.analysis.MolearnAnalysis.set_dataset>`
'''
@@ -237,7 +277,10 @@ Source code for molearn.analysis.analyser
ramachandran.update({f'decoded_{key}':value for key, value in self.get_all_ramachandran_score(decoded).items()})
return ramachandran
-[docs] def setup_grid(self, samples=64, bounds_from=None, bounds=None, padding=0.1):
+
+
+[docs]
+ def setup_grid(self, samples=64, bounds_from=None, bounds=None, padding=0.1):
'''
Define a NxN point grid regularly sampling the latent space.
@@ -252,7 +295,7 @@ Source code for molearn.analysis.analyser
if bounds_from is None:
bounds_from = "all"
- bounds = self._get_bounds(bounds_from, exclude = key)
+ bounds = self._get_bounds(bounds_from, exclude=key)
bx = (bounds[1]-bounds[0])*padding
by = (bounds[3]-bounds[2])*padding
@@ -260,12 +303,13 @@ Source code for molearn.analysis.analyser
self.yvals = np.linspace(bounds[2]-by, bounds[3]+by, samples)
self.n_samples = samples
meshgrid = np.meshgrid(self.xvals, self.yvals)
- stack = np.stack(meshgrid, axis=2).reshape(-1,1,2)
+ stack = np.stack(meshgrid, axis=2).reshape(-1, 1, 2)
self.set_encoded(key, stack)
return key
- def _get_bounds(self, bounds_from, exclude = ['grid', 'grid_decoded']):
+
+ def _get_bounds(self, bounds_from, exclude=['grid', 'grid_decoded']):
'''
:param bounds_from: keys of datasets to be considered for identification of boundaries in latent space
:param exclude: keys of dataset not to consider
@@ -282,16 +326,18 @@ Source code for molearn.analysis.analyser
xmin, ymin, xmax, ymax = [], [], [], []
for key in bounds_from:
z = self.get_encoded(key)
- xmin.append(z[:,0].min())
- ymin.append(z[:,1].min())
- xmax.append(z[:,0].max())
- ymax.append(z[:,1].max())
+ xmin.append(z[:, 0].min())
+ ymin.append(z[:, 1].min())
+ xmax.append(z[:, 0].max())
+ ymax.append(z[:, 1].max())
xmin, ymin = min(xmin), min(ymin)
xmax, ymax = max(xmax), max(ymax)
return xmin, xmax, ymin, ymax
-[docs] def scan_error_from_target(self, key, index=None, align=False):
+
+[docs]
+ def scan_error_from_target(self, key, index=None, align=True):
'''
Calculate landscape of RMSD vs single target structure. Target should be previously loaded datset containing a single conformation.
@@ -315,19 +361,22 @@ Source code for molearn.analysis.analyser
decoded = self.get_decoded('grid')
if align:
- crd_ref = as_numpy(target.permute(0,2,1))*self.stdval
- crd_mdl = as_numpy(decoded.permute(0,2,1))*self.stdval
+ crd_ref = as_numpy(target.permute(0, 2, 1))*self.stdval
+ crd_mdl = as_numpy(decoded.permute(0, 2, 1))*self.stdval
m = deepcopy(self.mol)
m.coordinates = np.concatenate([crd_ref, crd_mdl])
m.set_current(0)
- rmsd = np.array([m.rmsd(0,i) for i in range(1, len(m.coordinates))])
+ rmsd = np.array([m.rmsd(0, i) for i in range(1, len(m.coordinates))])
else:
rmsd = (((decoded-target)*self.stdval)**2).sum(axis=1).mean(axis=-1).sqrt()
- self.surfaces[s_key] = rmsd.reshape(self.n_samples, self.n_samples).numpy()
+ self.surfaces[s_key] = as_numpy(rmsd.reshape(self.n_samples, self.n_samples))
return self.surfaces[s_key], self.xvals, self.yvals
-[docs] def scan_error(self, s_key='Network_RMSD', z_key='Network_z_drift'):
+
+
+[docs]
+ def scan_error(self, s_key='Network_RMSD', z_key='Network_z_drift'):
'''
Calculate RMSD and z-drift on a grid sampling the latent space.
Requires a grid system to be defined via a prior call to :func:`set_dataset <molearn.analysis.MolearnAnalysis.setup_grid>`.
@@ -343,30 +392,32 @@ Source code for molearn.analysis.analyser
z_key = 'Network_z_drift'
if s_key not in self.surfaces:
assert 'grid' in self._encoded, 'make sure to call MolearnAnalysis.setup_grid first'
- decoded = self.get_decoded('grid') # decode grid
- #self.set_dataset('grid_decoded', decoded) # add back as dataset w. different name
+ decoded = self.get_decoded('grid') # decode grid
+ # self.set_dataset('grid_decoded', decoded) # add back as dataset w. different name
self._datasets['grid_decoded'] = decoded
- decoded_2 = self.get_decoded('grid_decoded') # encode, and decode a second time
- grid = self.get_encoded('grid') # retrieve original grid
- grid_2 = self.get_encoded('grid_decoded') # retrieve decoded encoded grid
+ decoded_2 = self.get_decoded('grid_decoded') # encode, and decode a second time
+ grid = self.get_encoded('grid') # retrieve original grid
+ grid_2 = self.get_encoded('grid_decoded') # retrieve decoded encoded grid
rmsd = (((decoded-decoded_2)*self.stdval)**2).sum(axis=1).mean(axis=-1).sqrt()
z_drift = ((grid-grid_2)**2).mean(axis=2).mean(axis=1).sqrt()
self.surfaces[s_key] = rmsd.reshape(self.n_samples, self.n_samples).numpy()
self.surfaces[z_key] = z_drift.reshape(self.n_samples, self.n_samples).numpy()
+
return self.surfaces[s_key], self.surfaces[z_key], self.xvals, self.yvals
+
def _ramachandran_score(self, frame):
'''
returns multiprocessing AsyncResult
AsyncResult.get() will return the result
'''
if not hasattr(self, 'ramachandran_score_class'):
- self.ramachandran_score_class = Parallel_Ramachandran_Score(self.mol, self.processes) #Parallel_Ramachandran_Score(self.mol)
+ self.ramachandran_score_class = Parallel_Ramachandran_Score(self.mol, self.processes)
assert len(frame.shape) == 2, f'We wanted 2D data but got {len(frame.shape)} dimensions'
if frame.shape[0] == 3:
- f = frame.permute(1,0)
+ f = frame.permute(1, 0)
else:
assert frame.shape[1] == 3
f = frame
@@ -374,9 +425,8 @@ Source code for molearn.analysis.analyser
f = f.data.cpu().numpy()
return self.ramachandran_score_class.get_score(f*self.stdval)
- #nf, na, no, nt = self.ramachandran_score_class.get_score(f*self.stdval)
- #return {'favored':nf, 'allowed':na, 'outliers':no, 'total':nt}
-
+ # nf, na, no, nt = self.ramachandran_score_class.get_score(f*self.stdval)
+ # return {'favored':nf, 'allowed':na, 'outliers':no, 'total':nt}
def _dope_score(self, frame, refine=True, **kwargs):
'''
@@ -388,16 +438,18 @@ Source code for molearn.analysis.analyser
assert len(frame.shape) == 2, f'We wanted 2D data but got {len(frame.shape)} dimensions'
if frame.shape[0] == 3:
- f = frame.permute(1,0)
+ f = frame.permute(1, 0)
else:
- assert frame.shape[1] ==3
+ assert frame.shape[1] == 3
f = frame
if isinstance(f,torch.Tensor):
f = f.data.cpu().numpy()
return self.dope_score_class.get_score(f*self.stdval, refine=refine, **kwargs)
-[docs] def get_all_ramachandran_score(self, tensor):
+
+[docs]
+ def get_all_ramachandran_score(self, tensor):
'''
Calculate Ramachandran score of an ensemble of atomic conrdinates.
@@ -407,7 +459,7 @@ Source code for molearn.analysis.analyser
results = []
for f in tensor:
results.append(self._ramachandran_score(f))
- for r in tqdm(results,desc=f'Calc rama'):
+ for r in tqdm(results,desc='Calc rama'):
favored, allowed, outliers, total = r.get()
rama['favored'].append(favored)
rama['allowed'].append(allowed)
@@ -415,7 +467,10 @@ Source code for molearn.analysis.analyser
rama['total'].append(total)
return {key:np.array(value) for key, value in rama.items()}
-[docs] def get_all_dope_score(self, tensor, refine=True):
+
+
+[docs]
+ def get_all_dope_score(self, tensor, refine=True):
'''
Calculate DOPE score of an ensemble of atom coordinates.
@@ -425,16 +480,19 @@ Source code for molearn.analysis.analyser
results = []
for f in tensor:
results.append(self._dope_score(f, refine=refine))
- results = np.array([r.get() for r in tqdm(results, desc=f'Calc Dope')])
+ results = np.array([r.get() for r in tqdm(results, desc='Calc Dope')])
return results
-[docs] def reference_dope_score(self, frame):
+
+
+[docs]
+ def reference_dope_score(self, frame):
'''
:param numpy.array frame: array with shape [1, N, 3] with Cartesian coordinates of atoms
:return: DOPE score
'''
self.mol.coordinates = deepcopy(frame)
- self.mol.write_pdb('tmp.pdb', split_struc = False)
+ self.mol.write_pdb('tmp.pdb', split_struc=False)
env = Environ()
env.libs.topology.read(file='$(LIB)/top_heav.lib')
env.libs.parameters.read(file='$(LIB)/par.lib')
@@ -443,7 +501,10 @@ Source code for molearn.analysis.analyser
score = atmsel.assess_dope()
return score
-[docs] def scan_dope(self, key=None, refine=True, **kwargs):
+
+
+[docs]
+ def scan_dope(self, key=None, refine=True, **kwargs):
'''
Calculate DOPE score on a grid sampling the latent space.
Requires a grid system to be defined via a prior call to :func:`set_dataset <molearn.analysis.MolearnAnalysis.setup_grid>`.
@@ -468,13 +529,16 @@ Source code for molearn.analysis.analyser
decoded = self.get_decoded('grid')
result = self.get_all_dope_score(decoded, refine=refine, **kwargs)
if refine=='both':
- self.surfaces[key] = as_numpy(result.reshape(self.n_samples, self.n_samples,2))
+ self.surfaces[key] = as_numpy(result.reshape(self.n_samples, self.n_samples, 2))
else:
self.surfaces[key] = as_numpy(result.reshape(self.n_samples, self.n_samples))
return self.surfaces[key], self.xvals, self.yvals
-[docs] def scan_ramachandran(self):
+
+
+[docs]
+ def scan_ramachandran(self):
'''
Calculate Ramachandran scores on a grid sampling the latent space.
Requires a grid system to be defined via a prior call to :func:`set_dataset <molearn.analysis.MolearnAnalysis.setup_grid>`.
@@ -493,8 +557,11 @@ Source code for molearn.analysis.analyser
self.surfaces[keys[key]] = value
return self.surfaces['Ramachandran_favored'], self.xvals, self.yvals
+
-[docs] def scan_custom(self, fct, params, key):
+
+[docs]
+ def scan_custom(self, fct, params, key):
'''
Generate a surface coloured as a function of a user-defined function.
@@ -507,14 +574,17 @@ Source code for molearn.analysis.analyser
'''
decoded = self.get_decoded('grid')
results = []
- for i,j in enumerate(decoded):
- s = (j.view(1,3,-1).permute(0,2,1)*self.stdval).numpy()
+ for i, j in enumerate(decoded):
+ s = (j.view(1, 3, -1).permute(0, 2, 1)*self.stdval).numpy()
results.append(fct(s, *params))
self.surfaces[key] = np.array(results).reshape(self.n_samples, self.n_samples)
return self.surfaces[key], self.xvals, self.yvals
-[docs] def generate(self, crd):
+
+
+[docs]
+ def generate(self, crd):
'''
Generate a collection of protein conformations, given coordinates in the latent space.
@@ -569,8 +639,8 @@ Navigation
\ No newline at end of file
diff --git a/docs/build/_modules/molearn/analysis/path.html b/docs/build/_modules/molearn/analysis/path.html
index f1342c8..2516674 100644
--- a/docs/build/_modules/molearn/analysis/path.html
+++ b/docs/build/_modules/molearn/analysis/path.html
@@ -1,18 +1,15 @@
-
-
+
molearn.analysis.path — molearn 2.0.1 documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -45,7 +42,8 @@ Source code for molearn.analysis.path
:synopsis: Tools for linking waypoints with paths in latent space
"""
-class PriorityQueue(object):
+
+class PriorityQueue:
'''
Queue for shortest path algorithms.
@@ -112,7 +110,7 @@ Source code for molearn.analysis.path
idx = np.unravel_index(idx, gridshape)
elif len(idx) != 2:
raise Exception("Expecting 2D coordinates")
- except:
+ except Exception:
raise Exception("idx should be either integer or an iterable")
# generate neighbour list
@@ -142,6 +140,7 @@ Source code for molearn.analysis.path
'''
:return: scalar value, reporting on the cost of moving onto a grid cell
'''
+
# separate function for clarity, and in case in the future we want to alter this
return graph[pt]
@@ -192,7 +191,9 @@ Source code for molearn.analysis.path
return came_from, cost_so_far
-[docs]def get_path(idx_start, idx_end, landscape, xvals, yvals, smooth=3):
+
+[docs]
+def get_path(idx_start, idx_end, landscape, xvals, yvals, smooth=3):
'''
Find shortest path between two points on a weighted grid
@@ -216,7 +217,9 @@ Source code for molearn.analysis.path
coords = []
score = []
idx_flat = np.ravel_multi_index(idx_end, landscape.shape)
- while cnt<1000: #safeguad for (unlikely) unfinished paths
+
+ # safeguard for (unlikely) unfinished paths
+ while cnt<1000:
if idx_flat == mypath[idx_flat]:
break
@@ -258,7 +261,9 @@ Source code for molearn.analysis.path
return np.array([my_x, my_y])
-[docs]def get_path_aggregate(crd, landscape, xvals, yvals, input_is_index=False):
+
+[docs]
+def get_path_aggregate(crd, landscape, xvals, yvals, input_is_index=False):
'''
Create a chain of shortest paths via give waypoints
@@ -290,7 +295,10 @@ Source code for molearn.analysis.path
return crd
-[docs]def oversample(crd, pts=10):
+
+
+[docs]
+def oversample(crd, pts=10):
'''
Add extra equally spaced points between a list of points.
@@ -298,6 +306,7 @@ Source code for molearn.analysis.path
:param int pts: number of extra points to add in each interval
:return: Mx2 numpy array, with M>=N.
'''
+
pts += 1
steps = np.linspace(1./pts, 1, pts)
pts = [crd[0]]
@@ -307,6 +316,7 @@ Source code for molearn.analysis.path
pts.append(newpt)
return np.array(pts)
+
@@ -344,8 +354,8 @@ Navigation
\ No newline at end of file
diff --git a/docs/build/_modules/molearn/data/pdb_data.html b/docs/build/_modules/molearn/data/pdb_data.html
index 248a579..1e1a70d 100644
--- a/docs/build/_modules/molearn/data/pdb_data.html
+++ b/docs/build/_modules/molearn/data/pdb_data.html
@@ -1,18 +1,15 @@
-
-
+
molearn.data.pdb_data — molearn 2.0.1 documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -42,9 +39,12 @@ Source code for molearn.data.pdb_data
from copy import deepcopy
import biobox as bb
-[docs]class PDBData:
+
+
+[docs]
+class PDBData:
- def __init__(self, filename = None, fix_terminal = False, atoms = None, ):
+ def __init__(self, filename=None, fix_terminal=False, atoms=None):
'''
Create object enabling the manipulation of multi-PDB files into a dataset suitable for training.
@@ -61,9 +61,11 @@ Source code for molearn.data.pdb_data
if fix_terminal:
self.fix_terminal()
if atoms is not None:
- self.atomselect(atoms = atoms)
+ self.atomselect(atoms=atoms)
-[docs] def import_pdb(self, filename):
+
+[docs]
+ def import_pdb(self, filename):
'''
Load multiPDB file.
This command can be called multiple times to load many datasets, if these feature the same number of atoms
@@ -77,7 +79,10 @@ Source code for molearn.data.pdb_data
self.filename = []
self.filename.append(filename)
-[docs] def fix_terminal(self):
+
+
+[docs]
+ def fix_terminal(self):
'''
Rename OT1 N-terminal Oxygen to O if terminal oxygens are named OT1 and OT2 otherwise no oxygen will be selected during an atomselect using atoms = ['CA', 'C','N','O','CB']. No template will be found for terminal residue in openmm_loss. Alternative solution is to use atoms = ['CA', 'C', 'N', 'O', 'CB', 'OT1']. instead.
'''
@@ -86,7 +91,10 @@ Source code for molearn.data.pdb_data
if len(ot1)!=0 and len(ot2)!=0:
self._mol.data.loc[ot1,'name']='O'
-[docs] def atomselect(self, atoms, ignore_atoms=[]):
+
+
+[docs]
+ def atomselect(self, atoms, ignore_atoms=[]):
'''
From all imported PDBs, extract only atoms of interest.
:func:`import_pdb <molearn.data.PDBData.import_pdb>` must have been called at least once, either at class instantiation or as a separate call.
@@ -99,15 +107,19 @@ Source code for molearn.data.pdb_data
if to_remove in _atoms:
_atoms.remove(to_remove)
elif atoms == "no_hydrogen":
- _atoms = self.atoms #list(np.unique(self._mol.data["name"].values)) #all the atoms
+ _atoms = self.atoms # list(np.unique(self._mol.data["name"].values)) #all the atoms
_plain_atoms = []
for a in _atoms:
if a in self._mol.knowledge['atomtype']:
_plain_atoms.append(self._mol.knowledge['atomtype'][a])
elif a[:-1] in self._mol.knowledge['atomtype']:
_plain_atoms.append(self._mol.knowledge['atomtype'][a[:-1]])
+ print(f'Could not find {a}. I am assuing you meant {a[:-1]} instead.')
+ elif a[:-2] in self._mol.knowledge['atomtype']:
+ _plain_atoms.append(self._mol.knowledge['atomtype'][a[:-2]])
+ print(f'Could not find {a}. I am assuming you meant {a[:-2]} instead.')
else:
- _plain_atoms.append(self._mol.knowledge['atomtype'][a]) # if above failed just raise the keyerror
+ _plain_atoms.append(self._mol.knowledge['atomtype'][a]) # if above failed just raise the keyerror
_atoms = [atom for atom, element in zip(_atoms, _plain_atoms) if element != 'H']
else:
_atoms = [_a for _a in atoms if _a not in ignore_atoms]
@@ -115,7 +127,10 @@ Source code for molearn.data.pdb_data
_, self._idxs = self._mol.atomselect("*", "*", _atoms, get_index=True)
self._mol = self._mol.get_subset(self._idxs)
-[docs] def prepare_dataset(self):
+
+
+[docs]
+ def prepare_dataset(self):
'''
Once all datasets have been loaded, normalise data and convert into `torch.Tensor` (ready for training)
'''
@@ -134,7 +149,10 @@ Source code for molearn.data.pdb_data
print(f'Dataset.shape: {self.dataset.shape}')
print(f'mean: {str(self.mean)}, std: {str(self.std)}')
-[docs] def get_atominfo(self):
+
+
+[docs]
+ def get_atominfo(self):
'''
generate list of all atoms in dataset, where every line contains [atom name, residue name, resid]
'''
@@ -143,7 +161,10 @@ Source code for molearn.data.pdb_data
self.atominfo = self._mol.get_data(columns=['name', 'resname', 'resid'])
return self.atominfo
-[docs] def frame(self):
+
+
+[docs]
+ def frame(self):
'''
return `biobox.Molecule` object with loaded data
'''
@@ -156,7 +177,10 @@ Source code for molearn.data.pdb_data
M.properties['center'] = M.get_center()
return deepcopy(M)
-[docs] def get_dataloader(self, batch_size, validation_split=0.1, pin_memory=True, dataset_sample_size=-1, manual_seed=None, shuffle=True, sampler=None):
+
+
+[docs]
+ def get_dataloader(self, batch_size, validation_split=0.1, pin_memory=True, dataset_sample_size=-1, manual_seed=None, shuffle=True, sampler=None):
'''
:param batch_size:
:param validation_split:
@@ -182,8 +206,11 @@ Source code for molearn.data.pdb_data
self.train_dataloader = torch.utils.data.DataLoader(self.train_dataset, batch_size=batch_size, pin_memory=pin_memory, shuffle=True)
self.valid_dataloader = torch.utils.data.DataLoader(self.valid_dataset, batch_size=batch_size, pin_memory=pin_memory,shuffle=True)
return self.train_dataloader, self.valid_dataloader
+
-[docs] def split(self, *args, **kwargs):
+
+[docs]
+ def split(self, *args, **kwargs):
'''
Split :func:`PDBData <molearn.data.PDBData>` into two other :func:`PDBData <molearn.data.PDBData>` objects corresponding to train and valid sets.
@@ -194,7 +221,7 @@ Source code for molearn.data.pdb_data
:return: :func:`PDBData <molearn.data.PDBData>` object corresponding to train set
:return: :func:`PDBData <molearn.data.PDBData>` object corresponding to validation set
'''
- #validation_split=0.1, valid_size=None, train_size=None, manual_seed = None):
+ # validation_split=0.1, valid_size=None, train_size=None, manual_seed = None):
train_dataset, valid_dataset = self.get_datasets(*args, **kwargs)
train = PDBData()
valid = PDBData()
@@ -205,7 +232,10 @@ Source code for molearn.data.pdb_data
valid.dataset = valid_dataset
return train, valid
-[docs] def get_datasets(self, validation_split=0.1, valid_size=None, train_size=None, manual_seed = None):
+
+
+[docs]
+ def get_datasets(self, validation_split=0.1, valid_size=None, train_size=None, manual_seed=None):
'''
Create a training and validation set from the imported data
@@ -229,7 +259,7 @@ Source code for molearn.data.pdb_data
_valid_size = valid_size
from torch import randperm
if manual_seed is not None:
- indices = randperm(len(self.dataset), generator = torch.Generator().manual_seed(manual_seed))
+ indices = randperm(len(self.dataset), generator=torch.Generator().manual_seed(manual_seed))
else:
indices = randperm(len(self.dataset))
@@ -238,16 +268,14 @@ Source code for molearn.data.pdb_data
valid_dataset = dataset[indices[_train_size:_train_size+_valid_size]]
return train_dataset, valid_dataset
+
@property
def atoms(self):
- return list(np.unique(self._mol.data["name"].values)) #all the atoms
+ return list(np.unique(self._mol.data["name"].values)) # all the atoms
@property
def mol(self):
return self.frame()
-
-
-
@@ -286,8 +314,8 @@ Navigation
\ No newline at end of file
diff --git a/docs/build/_modules/molearn/loss_functions/openmm_thread.html b/docs/build/_modules/molearn/loss_functions/openmm_thread.html
index ddff98f..ac7c462 100644
--- a/docs/build/_modules/molearn/loss_functions/openmm_thread.html
+++ b/docs/build/_modules/molearn/loss_functions/openmm_thread.html
@@ -1,18 +1,15 @@
-
-
+
molearn.loss_functions.openmm_thread — molearn 2.0.1 documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -53,11 +50,14 @@ Source code for molearn.loss_functions.openmm_thread
import torch
import numpy as np
+from copy import deepcopy
-[docs]class ModifiedForceField(ForceField):
+
+[docs]
+class ModifiedForceField(ForceField):
- def __init__(self, *args, alternative_residue_names = None, **kwargs):
+ def __init__(self, *args, alternative_residue_names=None, **kwargs):
'''
Takes all `*args` and `**kwargs` of `openmm.app.ForceField`, plus an optional parameter described here.
@@ -119,7 +119,7 @@ Source code for molearn.loss_functions.openmm_thread
matches = m
return [template, matches]
print(f'multiple for {t.name}')
- # We found multiple matches. This is OK if and only if they assign identical types and parameters to all atoms.
+ # We found multiple matches. This is OK if and only if they assign identical types and parameters to all atoms.
t1, m1 = allMatches[0]
for t2, m2 in allMatches[1:]:
@@ -129,15 +129,19 @@ Source code for molearn.loss_functions.openmm_thread
matches = allMatches[0][1]
return [template, matches]
-[docs]class OpenmmPluginScore():
+
+
+
+[docs]
+class OpenmmPluginScore():
'''
This will use the new OpenMM Plugin to calculate forces and energy. The intention is that this will be fast enough to be able to calculate forces and energy during training.
N.B.: The current torchintegratorplugin only supports float on GPU and double on CPU.
'''
- def __init__(self, mol=None, xml_file = ['amber14-all.xml'], platform = 'CUDA', remove_NB=False,
- alternative_residue_names = dict(HIS='HIE', HSE='HIE'), atoms=['CA', 'C', 'N', 'CB','O'],
- soft=False):
+ def __init__(self, mol=None, xml_file=['amber14-all.xml'], platform='CUDA', remove_NB=False,
+ alternative_residue_names=dict(HIS='HIE', HSE='HIE'), atoms=['CA', 'C', 'N', 'CB','O'],
+ soft=False):
'''
:param `biobox.Molecule` mol: if pldataloader is not given, then a biobox object will be taken from this parameter. If neither are given then an error will be thrown.
:param str xml_file: xml parameter file
@@ -149,12 +153,12 @@ Source code for molearn.loss_functions.openmm_thread
'''
self.mol = mol
for key, value in alternative_residue_names.items():
- #self.mol.data.loc[:,'resname'][self.mol.data['resname']==key]=value
+ # self.mol.data.loc[:,'resname'][self.mol.data['resname']==key]=value
self.mol.data.loc[self.mol.data['resname']==key,'resname']=value
- #self.mol.data.loc[lambda df: df['resname']==key, key]=value
+ # self.mol.data.loc[lambda df: df['resname']==key, key]=value
tmp_file = f'tmp{np.random.randint(1e10)}.pdb'
self.atoms = atoms
- self.mol.write_pdb(tmp_file, split_struc = False)
+ self.mol.write_pdb(tmp_file, split_struc=False)
self.pdb = PDBFile(tmp_file)
if soft:
print('attempting soft forcefield')
@@ -164,9 +168,9 @@ Source code for molearn.loss_functions.openmm_thread
self.system = self.forcefield.createSystem(self.pdb.topology)
else:
if isinstance(xml_file,str):
- self.forcefield = ModifiedForceField(xml_file, alternative_residue_names = alternative_residue_names)
+ self.forcefield = ModifiedForceField(xml_file, alternative_residue_names=alternative_residue_names)
elif len(xml_file)>0:
- self.forcefield = ModifiedForceField(*xml_file, alternative_residue_names = alternative_residue_names)
+ self.forcefield = ModifiedForceField(*xml_file, alternative_residue_names=alternative_residue_names)
else:
raise ValueError(f'xml_file: {xml_file} needs to be a str or a list of str')
@@ -174,14 +178,14 @@ Source code for molearn.loss_functions.openmm_thread
self.ignore_hydrogen()
else:
self.atomselect(atoms)
- #save pdb and reload in modeller
+ # save pdb and reload in modeller
templates, unique_unmatched_residues = self.forcefield.generateTemplatesForUnmatchedResidues(self.pdb.topology)
self.system = self.forcefield.createSystem(self.pdb.topology)
if remove_NB:
forces = self.system.getForces()
for idx in reversed(range(len(forces))):
force = forces[idx]
- if isinstance(force, (#openmm.PeriodicTorsionForce,
+ if isinstance(force, ( # openmm.PeriodicTorsionForce,
openmm.CustomGBForce,
openmm.NonbondedForce,
openmm.CMMotionRemover)):
@@ -193,7 +197,6 @@ Source code for molearn.loss_functions.openmm_thread
if isinstance(force, openmm.CustomGBForce):
self.system.removeForce(idx)
-
self.integrator = TorchExposedIntegrator()
self.platform = Platform.getPlatformByName(platform)
self.simulation = Simulation(self.pdb.topology, self.system, self.integrator, self.platform)
@@ -206,7 +209,7 @@ Source code for molearn.loss_functions.openmm_thread
os.remove(tmp_file)
def ignore_hydrogen(self):
- #ignore = ['ASH', 'LYN', 'GLH', 'HID', 'HIP', 'CYM', ]
+ # ignore = ['ASH', 'LYN', 'GLH', 'HID', 'HIP', 'CYM', ]
ignore = []
for name, template in self.forcefield._templates.items():
if name in ignore:
@@ -232,6 +235,12 @@ Source code for molearn.loss_functions.openmm_thread
self.forcefield.registerPatch(patchData)
def atomselect(self, atoms):
+ atoms = deepcopy(atoms)
+ if 'OT2' in atoms:
+ atoms.append('OXT')
+ if 'OT1' in atoms:
+ atoms.append('OXT')
+
for name, template in self.forcefield._templates.items():
patchData = ForceField._PatchData(name+'_leave_only_'+'_'.join(atoms), 1)
@@ -254,8 +263,9 @@ Source code for molearn.loss_functions.openmm_thread
self.forcefield.registerTemplatePatch(name, name+'_leave_only_'+'_'.join(atoms), 0)
self.forcefield.registerPatch(patchData)
-
-[docs] def get_energy(self, pos_ptr, force_ptr, energy_ptr, n_particles, batch_size):
+
+[docs]
+ def get_energy(self, pos_ptr, force_ptr, energy_ptr, n_particles, batch_size):
'''
:param pos_ptr: tensor.data_ptr()
:param force_ptr: tensor.data_ptr()
@@ -268,17 +278,25 @@ Source code for molearn.loss_functions.openmm_thread
self.integrator.torchMultiStructureE(pos_ptr, force_ptr, energy_ptr, n_particles, batch_size)
return True
-[docs] def execute(self, x):
+
+
+[docs]
+ def execute(self, x):
'''
:param `torch.Tensor` x: shape [b, N, 3]. dtype=float. device = gpu
'''
force = torch.zeros_like(x)
- energy = torch.zeros(x.shape[0], device = torch.device('cpu'), dtype=torch.double)
+ energy = torch.zeros(x.shape[0], device=torch.device('cpu'), dtype=torch.double)
self.get_energy(x.data_ptr(), force.data_ptr(), energy.data_ptr(), x.shape[1], x.shape[0])
- return force, energy
+ return force, energy
+
+
-[docs]class OpenmmTorchEnergyMinimizer(OpenmmPluginScore):
+
+[docs]
+class OpenmmTorchEnergyMinimizer(OpenmmPluginScore):
+
def minimize(self, x, maxIterations=10, threshold=10000):
minimized_x = torch.empty_like(x)
for i,s in enumerate(x.unsqueeze(1)):
@@ -301,12 +319,15 @@ Source code for molearn.loss_functions.openmm_thread
[docs]class OpenMMPluginScoreSoftForceField(OpenmmPluginScore):
+
+[docs]
+class OpenMMPluginScoreSoftForceField(OpenmmPluginScore):
+
def __init__(self, mol=None, platform='CUDA', atoms=['CA','C','N','CB','O']):
self.mol = mol
tmp_file = 'tmp.pdb'
self.atoms = atoms
- self.mol.write_pdb(tmp_file, split_struc = False)
+ self.mol.write_pdb(tmp_file, split_struc=False)
self.pdb = PDBFile(tmp_file)
from pdbfixer import PDBFixer
f = PDBFixer(tmp_file)
@@ -323,9 +344,14 @@ Source code for molearn.loss_functions.openmm_thread
print(self.simulation.context.getState(getEnergy=True).getPotentialEnergy()._value)
-[docs]class openmm_energy_function(torch.autograd.Function):
-[docs] @staticmethod
+
+[docs]
+class openmm_energy_function(torch.autograd.Function):
+
+
+[docs]
+ @staticmethod
def forward(ctx, plugin, x):
'''
:param plugin: OpenmmPluginScore instance
@@ -343,22 +369,32 @@ Source code for molearn.loss_functions.openmm_thread
force = torch.tensor(force).float()
energy = torch.tensor(energy).float()
else:
- #torch.cuda.synchronize(x.device)
+ # torch.cuda.synchronize(x.device)
force, energy = plugin.execute(x)
- #torch.cuda.synchronize(x.device)
+ # torch.cuda.synchronize(x.device)
ctx.save_for_backward(force)
energy = energy.float().to(x.device)
return energy
-[docs] @staticmethod
+
+
+[docs]
+ @staticmethod
def backward(ctx, grad_output):
- force = ctx.saved_tensors[0] # force shape [B, N, 3]
- #embed(header='23 openmm_loss_function')
- return None, -force*grad_output.view(-1,1,1)
+ force = ctx.saved_tensors[0] # force shape [B, N, 3]
+ # embed(header='23 openmm_loss_function')
+ return None, -force*grad_output.view(-1,1,1)
+
+
+
-[docs]class openmm_clamped_energy_function(torch.autograd.Function):
+
+[docs]
+class openmm_clamped_energy_function(torch.autograd.Function):
-[docs] @staticmethod
+
+[docs]
+ @staticmethod
def forward(ctx, plugin, x, clamp):
'''
:param plugin: OpenmmPluginScore instance
@@ -383,13 +419,22 @@ Source code for molearn.loss_functions.openmm_thread
energy = energy.float().to(x.device)
return energy
-[docs] @staticmethod
+
+
+[docs]
+ @staticmethod
def backward(ctx, grad_output):
force = ctx.saved_tensors[0]
- return None, -force*grad_output.view(-1,1,1), None
+ return None, -force*grad_output.view(-1, 1, 1), None
+
+
-[docs]class openmm_energy(torch.nn.Module):
- def __init__(self, mol, std, clamp = None, **kwargs):
+
+
+[docs]
+class openmm_energy(torch.nn.Module):
+
+ def __init__(self, mol, std, clamp=None, **kwargs):
super().__init__()
self.openmmplugin = OpenmmPluginScore(mol, **kwargs)
self.std = std/10
@@ -404,7 +449,7 @@ Source code for molearn.loss_functions.openmm_thread
:param `torch.Tensor` x: dtype=torch.float, device=CUDA, shape B, 3, N
:returns: torch energy tensor dtype should be float and on same device as x
'''
- _x = (x*self.std).permute(0,2,1).contiguous()
+ _x = (x*self.std).permute(0, 2, 1).contiguous()
energy = openmm_energy_function.apply(self.openmmplugin, _x)
return energy
@@ -413,7 +458,7 @@ Source code for molearn.loss_functions.openmm_thread
:param `torch.Tensor` x: dtype=torch.float, device=CUDA, shape B, 3, N
:returns: torch energy tensor dtype should be float and on same device as x
'''
- _x = (x*self.std).permute(0,2,1).contiguous()
+ _x = (x*self.std).permute(0, 2, 1).contiguous()
energy = openmm_clamped_energy_function.apply(self.openmmplugin, _x, self.clamp)
return energy
@@ -454,8 +499,8 @@ Navigation
\ No newline at end of file
diff --git a/docs/build/_modules/molearn/models/CNN_autoencoder.html b/docs/build/_modules/molearn/models/CNN_autoencoder.html
index d182e00..bd8b707 100644
--- a/docs/build/_modules/molearn/models/CNN_autoencoder.html
+++ b/docs/build/_modules/molearn/models/CNN_autoencoder.html
@@ -1,18 +1,15 @@
-
-
+
molearn.models.CNN_autoencoder — molearn 2.0.1 documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -55,27 +52,31 @@ Source code for molearn.models.CNN_autoencoder
def __init__(self, f):
super(ResidualBlock, self).__init__()
- conv_block = [ nn.Conv1d(f,f, 3, stride=1, padding=1, bias=False),
- nn.BatchNorm1d(f),
- nn.ReLU(inplace=True),
- nn.Conv1d(f,f, 3, stride=1, padding=1, bias=False),
- nn.BatchNorm1d(f) ]
+ conv_block = [nn.Conv1d(f, f, 3, stride=1, padding=1, bias=False),
+ nn.BatchNorm1d(f),
+ nn.ReLU(inplace=True),
+ nn.Conv1d(f, f, 3, stride=1, padding=1, bias=False),
+ nn.BatchNorm1d(f)]
self.conv_block = nn.Sequential(*conv_block)
def forward(self, x):
return x + self.conv_block(x)
- #return torch.relu(x + self.conv_block(x)) #earlier runs were with 'return x + self.conv_block(x)' but not an issue (really?)
+ # return torch.relu(x + self.conv_block(x)) #earlier runs were with 'return x + self.conv_block(x)' but not an issue (really?)
+
class To2D(nn.Module):
+
def __init__(self):
super(To2D, self).__init__()
pass
+
def forward(self, x):
- z = torch.nn.functional.adaptive_avg_pool2d(x, output_size=(2,1))
+ z = torch.nn.functional.adaptive_avg_pool2d(x, output_size=(2, 1))
z = torch.sigmoid(z)
return z
+
class From2D(nn.Module):
def __init__(self):
super(From2D, self).__init__()
@@ -88,9 +89,9 @@
Source code for molearn.models.CNN_autoencoder
return x
-
-
-
[docs]class Autoencoder(nn.Module):
+
+[docs]
+class Autoencoder(nn.Module):
'''
This is the autoencoder used in our `Ramaswamy 2021 paper <https://journals.aps.org/prx/abstract/10.1103/PhysRevX.11.011052>`_.
It is largely superseded by :func:`molearn.models.foldingnet.AutoEncoder`.
@@ -152,6 +153,7 @@ Source code for molearn.models.CNN_autoencoder
for m in self.decoder:
x = m(x)
return x
+
@@ -189,8 +191,8 @@ Navigation
\ No newline at end of file
diff --git a/docs/build/_modules/molearn/models/foldingnet.html b/docs/build/_modules/molearn/models/foldingnet.html
index 10ce2f5..9a380a6 100644
--- a/docs/build/_modules/molearn/models/foldingnet.html
+++ b/docs/build/_modules/molearn/models/foldingnet.html
@@ -1,18 +1,15 @@
-
-
+
molearn.models.foldingnet — molearn 2.0.1 documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -38,10 +35,10 @@ Navigation
Source code for molearn.models.foldingnet
import torch
-import biobox as bb
from torch import nn
import torch.nn.functional as F
+
def index_points(point_clouds, index):
'''
Given a batch of tensor and index, select sub-tensor.
@@ -73,7 +70,7 @@ Source code for molearn.models.foldingnet
xx = torch.sum(x ** 2, dim=1, keepdim=True) # (B, 1, N)
pairwise_distance = -xx - inner - xx.transpose(2, 1) # (B, 1, N), (B, N, N), (B, N, 1) -> (B, N, N)
- idx = pairwise_distance.topk(k=k, dim=-1)[1] # (B, N, k)
+ idx = pairwise_distance.topk(k=k, dim=-1)[1] # (B, N, k)
return idx
@@ -109,7 +106,7 @@ Source code for molearn.models.foldingnet
'''
Graph based encoder
'''
- def __init__(self, latent_dimension = 2,**kwargs):
+ def __init__(self, latent_dimension=2, **kwargs):
super(Encoder, self).__init__()
self.latent_dimension = latent_dimension
self.conv1 = nn.Conv1d(12, 64, 1)
@@ -143,7 +140,6 @@ Source code for molearn.models.foldingnet
x = F.relu(self.bn2(self.conv2(x)))
x = F.relu(self.bn3(self.conv3(x)))
-
# two consecutive graph layers
x = self.graph_layer1(x)
x = self.graph_layer2(x)
@@ -181,9 +177,9 @@ Source code for molearn.models.foldingnet
:param grids: reshaped 2D grids or intermediam reconstructed point clouds
"""
# concatenate
- #try:
+ # try:
# x = torch.cat([*args], dim=1)
- #except:
+ # except:
# for arg in args:
# print(arg.shape)
# raise
@@ -193,6 +189,7 @@ Source code for molearn.models.foldingnet
return x
+
class Decoder_Layer(nn.Module):
'''
Decoder Module of FoldingNet
@@ -202,14 +199,14 @@ Source code for molearn.models.foldingnet
super(Decoder_Layer, self).__init__()
# Sample the grids in 2D space
- #xx = np.linspace(-0.3, 0.3, 45, dtype=np.float32)
- #yy = np.linspace(-0.3, 0.3, 45, dtype=np.float32)
- #self.grid = np.meshgrid(xx, yy) # (2, 45, 45)
+ # xx = np.linspace(-0.3, 0.3, 45, dtype=np.float32)
+ # yy = np.linspace(-0.3, 0.3, 45, dtype=np.float32)
+ # self.grid = np.meshgrid(xx, yy) # (2, 45, 45)
self.out_points = out_points
self.grid = torch.linspace(-0.5, 0.5, out_points).view(1,-1)
# reshape
- #self.grid = torch.Tensor(self.grid).view(2, -1) # (2, 45, 45) -> (2, 45 * 45)
- assert out_points%in_points==0
+ # self.grid = torch.Tensor(self.grid).view(2, -1) # (2, 45, 45) -> (2, 45 * 45)
+ assert out_points % in_points == 0
self.m = out_points//in_points
self.fold1 = FoldingLayer(in_channel + 1, [512, 512, out_channel])
@@ -234,6 +231,7 @@ Source code for molearn.models.foldingnet
return recon2
+
class Decoder(nn.Module):
'''
Decoder Module of FoldingNet
@@ -244,14 +242,12 @@ Source code for molearn.models.foldingnet
self.latent_dimension = latent_dimension
# Sample the grids in 2D space
- #xx = np.linspace(-0.3, 0.3, 45, dtype=np.float32)
- #yy = np.linspace(-0.3, 0.3, 45, dtype=np.float32)
- #self.grid = np.meshgrid(xx, yy) # (2, 45, 45)
+ # xx = np.linspace(-0.3, 0.3, 45, dtype=np.float32)
+ # yy = np.linspace(-0.3, 0.3, 45, dtype=np.float32)
+ # self.grid = np.meshgrid(xx, yy) # (2, 45, 45)
-
start_out = (out_points//128) +1
-
self.out_points = out_points
self.layer1 = Decoder_Layer(1, start_out, latent_dimension,3*128)
@@ -272,7 +268,9 @@ Source code for molearn.models.foldingnet
return x
-[docs]class AutoEncoder(nn.Module):
+
+[docs]
+class AutoEncoder(nn.Module):
'''
Autoencoder architecture derived from FoldingNet.
'''
@@ -289,10 +287,14 @@ Source code for molearn.models.foldingnet
+
+
if __name__=='__main__':
@@ -334,8 +336,8 @@ Navigation
\ No newline at end of file
diff --git a/docs/build/_modules/molearn/scoring/dope_score.html b/docs/build/_modules/molearn/scoring/dope_score.html
index 0725482..7c51821 100644
--- a/docs/build/_modules/molearn/scoring/dope_score.html
+++ b/docs/build/_modules/molearn/scoring/dope_score.html
@@ -1,18 +1,15 @@
-
-
+
molearn.scoring.dope_score — molearn 2.0.1 documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -40,7 +37,7 @@ Source code for molearn.scoring.dope_score
import numpy as np
from copy import deepcopy
-from ..utils import ShutUp, cpu_count, random_string
+from ..utils import ShutUp, random_string
try:
import modeller
from modeller import *
@@ -49,22 +46,25 @@ Source code for molearn.scoring.dope_score
except Exception as e:
print('Error importing modeller: ')
print(e)
-
-from multiprocessing import Pool, Event, get_context
+from multiprocessing import get_context
import os
-[docs]class DOPE_Score:
+
+
+[docs]
+class DOPE_Score:
'''
This class contains methods to calculate dope without saving to save and load PDB files for every structure. Atoms in a biobox coordinate tensor are mapped to the coordinates in the modeller model directly.
'''
+ atom_map = {('ILE', 'CD1'):('ILE', 'CD')}
def __init__(self, mol):
'''
:param biobox.Molecule mol: One example frame to gain access to the topology. Mol will also be used to save a temporary pdb file that will be reloaded in modeller to create the initial modeller Model.
'''
- #set residues names with protonated histidines back to generic HIS name (needed by DOPE score function)
+ # set residues names with protonated histidines back to generic HIS name (needed by DOPE score function)
testH = mol.data["resname"].values
testH[testH == "HIE"] = "HIS"
testH[testH == "HID"] = "HIS"
@@ -73,10 +73,9 @@ Source code for molearn.scoring.dope_score
alternate_residue_names = dict(CSS=('CYX',))
atoms = ' '.join(list(_mol.data['name'].unique()))
- #tmp_file = f'tmp{np.random.randint(1e10)}.pdb'
tmp_file = f'tmp{random_string()}.pdb'
- _mol.write_pdb(tmp_file, conformations=[0], split_struc = False)
- log.level(0,0,0,0,0)
+ _mol.write_pdb(tmp_file, conformations=[0], split_struc=False)
+ log.level(0, 0, 0, 0, 0)
env = environ()
env.libs.topology.read(file='$(LIB)/top_heav.lib')
env.libs.parameters.read(file='$(LIB)/par.lib')
@@ -86,7 +85,7 @@ Source code for molearn.scoring.dope_score
atom_residue = _mol.get_data(columns=['name', 'resname', 'resid'])
atom_order = []
first_index = next(iter(self.fast_ss)).residue.index
- offset = atom_residue[0,2]-first_index
+ offset = atom_residue[0, 2]-first_index
for i, j in enumerate(self.fast_ss):
if i < len(atom_residue):
for j_residue_name in alternate_residue_names.get(j.residue.name, (j.residue.name,)):
@@ -95,16 +94,26 @@ Source code for molearn.scoring.dope_score
else:
where_arg = (atom_residue==(np.array([j.name, j_residue_name, j.residue.index+offset], dtype=object))).all(axis=1)
where = np.where(where_arg)[0]
+ if len(where)==0:
+ if (j_residue_name, j.name) in self.atom_map:
+ alt_residue_name, alt_name = self.atom_map[(j_residue_name, j.name)]
+ where_arg = (atom_residue==(np.array([alt_name, alt_residue_name, j.residue.index+offset], dtype=object))).all(axis=1)
+ where = np.where(where_arg)[0]
+ else:
+ print(f'Cant find {j.name} in the atoms {atom_residue[atom_residue[:,2]==j.residue.index+offset]} try adding a mapping to DOPE_Score.atom_map')
atom_order.append(int(where))
self.fast_atom_order = atom_order
# check fast dope atoms
- for i,j in enumerate(self.fast_ss):
+ reverse_map = {value:key for key, value in self.atom_map.items()}
+ for i, j in enumerate(self.fast_ss):
if i<len(atom_residue):
- assert _mol.data['name'][atom_order[i]]==j.name
+ assert _mol.data['name'][atom_order[i]]==j.name or reverse_map[(_mol.data['resname'][atom_order[i]], _mol.data['name'][atom_order[i]])][1]==j.name
self.cg = ConjugateGradients()
os.remove(tmp_file)
-[docs] def get_dope(self, frame, refine=False):
+
+[docs]
+ def get_dope(self, frame, refine=False):
'''
Get the dope score. Injects coordinates into modeller and uses `mdl.build(build_method='INTERNAL_COORDINATES', initialize_xyz=False)` to reconstruct missing atoms.
If a error is thrown by modeller or at any stage, we just return a fixed large value of 1e10.
@@ -114,6 +123,7 @@ Source code for molearn.scoring.dope_score
:returns: Dope score as calculated by modeller. If error is thrown we just simply return 1e10.
:rtype: float
'''
+
# expect coords to be shape [N, 3] use .cpu().numpy().copy() before passing here and make sure it is scaled correctly
try:
frame = frame.astype(float)
@@ -135,10 +145,13 @@ Source code for molearn.scoring.dope_score
dope_score = self.fast_fs.assess_dope()
return dope_score
- except:
+ except Exception:
return 1e10
+
-[docs] def get_all_dope(self, coords, refine=False):
+
+[docs]
+ def get_all_dope(self, coords, refine=False):
'''
Expect a array of frames. return array of DOPE score value.
@@ -147,6 +160,7 @@ Source code for molearn.scoring.dope_score
:returns: float array shape [B]
:rtype: np.ndarray
'''
+
# expect coords to be shape [B, N, 3] use .cpu().numpy().copy() before passing here and make sure it is scaled correctly
dope_scores = []
for frame in coords:
@@ -162,23 +176,32 @@ Source code for molearn.scoring.dope_score
dope_scores.append(self.fast_fs.assess_dope())
- return np.array(dope_scores)
+ return np.array(dope_scores)
+
+
+
def set_global_score(score, kwargs):
'''
Make score a global variable.
This is used when initializing a multiprocessing process.
'''
+
global worker_dope_score
- worker_dope_score = score(**kwargs)#mol = mol, data_dir=data_dir, **kwargs)
+ worker_dope_score = score(**kwargs) # mol = mol, data_dir=data_dir, **kwargs)
+
def process_dope(coords, kwargs):
'''
Worker function for multiprocessing class
'''
+
return worker_dope_score.get_dope(coords,**kwargs)
-[docs]class Parallel_DOPE_Score():
+
+
+[docs]
+class Parallel_DOPE_Score:
'''
a multiprocessing class to get modeller DOPE scores.
A typical use case would looke like::
@@ -190,9 +213,9 @@ Source code for molearn.scoring.dope_score
.... # DOPE will be calculated asynchronously in background
#to retrieve the results
results = np.array([r.get() for r in results])
-
'''
- def __init__(self, mol, processes=-1, context = 'spawn', **kwargs):
+
+ def __init__(self, mol, processes=-1, context='spawn', **kwargs):
'''
:param biobox.Molecule mol: biobox molecule containing one example frame of the protein to be analysed. This will be passed to DOPE_Score class instances in each thread.
:param int processes: (default: -1) Number of processes argument to pass to multiprocessing.pool. This controls the number of threads created.
@@ -201,28 +224,32 @@ Source code for molearn.scoring.dope_score
# set a number of processes as user desires, capped on number of CPUs
if processes > 0:
- processes = min(processes, cpu_count())
+ processes = min(processes, os.cpu_count())
else:
- processes = cpu_count()
+ processes = os.cpu_count()
self.processes = processes
self.mol = deepcopy(mol)
score = DOPE_Score
ctx = get_context(context)
self.pool = ctx.Pool(processes=processes, initializer=set_global_score,
initargs=(score, dict(mol=mol)),
- **kwargs,
- )
+ **kwargs)
self.process_function = process_dope
def __reduce__(self):
return (self.__class__, (self.mol, self.processes))
-[docs] def get_score(self, coords, **kwargs):
+
+[docs]
+ def get_score(self, coords, **kwargs):
'''
:param np.array coords: # shape (N, 3) numpy array
'''
- #is copy necessary?
- return self.pool.apply_async(self.process_function, (coords.copy(), kwargs))
+
+ # is copy necessary?
+ return self.pool.apply_async(self.process_function, (coords.copy(), kwargs))
+
+
@@ -260,8 +287,8 @@ Navigation
\ No newline at end of file
diff --git a/docs/build/_modules/molearn/scoring/ramachandran_score.html b/docs/build/_modules/molearn/scoring/ramachandran_score.html
index b480e8b..7271dff 100644
--- a/docs/build/_modules/molearn/scoring/ramachandran_score.html
+++ b/docs/build/_modules/molearn/scoring/ramachandran_score.html
@@ -1,18 +1,15 @@
-
-
+
molearn.scoring.ramachandran_score — molearn 2.0.1 documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -39,36 +36,41 @@ Navigation
Source code for molearn.scoring.ramachandran_score
import numpy as np
from copy import deepcopy
-from multiprocessing import Pool, Event, get_context
+from multiprocessing import get_context
from scipy.spatial.distance import cdist
from iotbx.data_manager import DataManager
from mmtbx.validation.ramalyze import ramalyze
from scitbx.array_family import flex
-from ..utils import cpu_count, random_string
+from ..utils import random_string
import os
-[docs]class Ramachandran_Score():
+
+
+[docs]
+class Ramachandran_Score:
'''
This class contains methods that use iotbx/mmtbx to calulate the quality of phi and psi values in a protein.
'''
+
def __init__(self, mol, threshold=1e-3):
'''
:param biobox.Molecule mol: One example frame to gain access to the topology. Mol will also be used to save a temporary pdb file that will be reloaded to create the initial iotbx Model.
:param float threshold: (default: 1e-3) Threshold used to determine similarity between biobox.molecule coordinates and iotbx model coordinates. Determine that iotbx model was created successfully.
'''
+
tmp_file = f'rama_tmp{random_string()}.pdb'
- mol.write_pdb(tmp_file, split_struc = False)#'rama_tmp.pdb')
- filename = tmp_file#'rama_tmp.pdb'
+ mol.write_pdb(tmp_file, split_struc=False)
+ filename = tmp_file
self.mol = mol
- self.dm = DataManager(datatypes = ['model'])
+ self.dm = DataManager(datatypes=['model'])
self.dm.process_model_file(filename)
self.model = self.dm.get_model(filename)
- self.score = ramalyze(self.model.get_hierarchy()) # get score to see if this works
+ self.score = ramalyze(self.model.get_hierarchy()) # get score to see if this works
self.shape = self.model.get_sites_cart().as_numpy_array().shape
- #tests
+ # tests
x = self.mol.coordinates[0]
m = self.model.get_sites_cart().as_numpy_array()
assert m.shape == x.shape
@@ -77,15 +79,17 @@ Source code for molearn.scoring.ramachandran_score
assert not np.any(((m-x[self.idxs])>threshold))
os.remove(tmp_file)
-[docs] def get_score(self, coords, as_ratio = False):
+
+[docs]
+ def get_score(self, coords, as_ratio=False):
'''
Given coords (corresponding to self.mol) will calculate Ramachandran scores using cctbux ramalyze module
Returns the counts of number of torsion angles that fall within favored, allowed, and outlier regions and finally the total number of torsion angles analysed.
:param numpy.ndarray coords: shape (N, 3)
:returns: (favored, allowed, outliers, total)
:rtype: tuple of ints
-
'''
+
assert coords.shape == self.shape
self.model.set_sites_cart(flex.vec3_double(coords[self.idxs].astype(np.double)))
self.score = ramalyze(self.model.get_hierarchy())
@@ -96,7 +100,8 @@ Source code for molearn.scoring.ramachandran_score
if as_ratio:
return nf/nt, na/nt, no/nt
else:
- return nf, na, no, nt
+ return nf, na, no, nt
+
@@ -105,17 +110,23 @@ Source code for molearn.scoring.ramachandran_score
make score a global variable
This is used when initializing a multiprocessing process
'''
+
global worker_ramachandran_score
- worker_ramachandran_score = score(**kwargs)#mol = mol, data_dir=data_dir, **kwargs)
+ worker_ramachandran_score = score(**kwargs) # mol = mol, data_dir=data_dir, **kwargs)
+
def process_ramachandran(coords, kwargs):
'''
ramachandran worker
Worker function for multiprocessing class
'''
- return worker_ramachandran_score.get_score(coords,**kwargs)
+
+ return worker_ramachandran_score.get_score(coords, **kwargs)
-[docs]class Parallel_Ramachandran_Score():
+
+
+[docs]
+class Parallel_Ramachandran_Score:
'''
A multiprocessing class to get Ramachandran scores.
A typical use case would looke like::
@@ -143,31 +154,30 @@ Source code for molearn.scoring.ramachandran_score
# set a number of processes as user desires, capped on number of CPUs
if processes > 0:
- processes = min(processes, cpu_count())
+ processes = min(processes, os.cpu_count())
else:
- processes = cpu_count()
+ processes = os.cpu_count()
self.mol = deepcopy(mol)
score = Ramachandran_Score
ctx = get_context('spawn')
self.pool = ctx.Pool(processes=processes, initializer=set_global_score,
- initargs=(score, dict(mol=mol)),
- )
+ initargs=(score, dict(mol=mol)))
self.process_function = process_ramachandran
def __reduce__(self):
return (self.__class__, (self.mol,))
-
-[docs] def get_score(self, coords,**kwargs):
+
+[docs]
+ def get_score(self, coords, **kwargs):
'''
:param coords: # shape (N, 3) numpy array
'''
- #is copy necessary?
- return self.pool.apply_async(self.process_function, (coords.copy(), kwargs))
-
-
+ # is copy necessary?
+ return self.pool.apply_async(self.process_function, (coords.copy(), kwargs))
+
@@ -206,8 +216,8 @@ Navigation
\ No newline at end of file
diff --git a/docs/build/_modules/molearn/trainers/openmm_physics_trainer.html b/docs/build/_modules/molearn/trainers/openmm_physics_trainer.html
index b81d016..288c404 100644
--- a/docs/build/_modules/molearn/trainers/openmm_physics_trainer.html
+++ b/docs/build/_modules/molearn/trainers/openmm_physics_trainer.html
@@ -1,18 +1,15 @@
-
-
+
molearn.trainers.openmm_physics_trainer — molearn 2.0.1 documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -42,7 +39,9 @@ Source code for molearn.trainers.openmm_physics_trainer
from .trainer import Trainer
-[docs]class OpenMM_Physics_Trainer(Trainer):
+
+[docs]
+class OpenMM_Physics_Trainer(Trainer):
'''
OpenMM_Physics_Trainer subclasses Trainer and replaces the valid_step and train_step.
An extra 'physics_loss' is calculated using OpenMM and the forces are inserted into backwards pass.
@@ -52,7 +51,9 @@ Source code for molearn.trainers.openmm_physics_trainer
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
-[docs] def prepare_physics(self, physics_scaling_factor=0.1, clamp_threshold = 1e8, clamp=False, start_physics_at=0, **kwargs):
+
+[docs]
+ def prepare_physics(self, physics_scaling_factor=0.1, clamp_threshold=1e8, clamp=False, start_physics_at=0, **kwargs):
'''
Create ``self.physics_loss`` object from :func:`loss_functions.openmm_energy <molearn.loss_functions.openmm_energy>`
Needs ``self.mol``, ``self.std``, and ``self._data.atoms`` to have been set with :func:`Trainer.set_data<molearn.trainer.Trainer.set_data>`
@@ -67,13 +68,15 @@ Source code for molearn.trainers.openmm_physics_trainer
self.start_physics_at = start_physics_at
self.psf = physics_scaling_factor
if clamp:
- clamp_kwargs = dict(max=clamp_threshold, min = -clamp_threshold)
+ clamp_kwargs = dict(max=clamp_threshold, min=-clamp_threshold)
else:
clamp_kwargs = None
- self.physics_loss = openmm_energy(self.mol, self.std, clamp=clamp_kwargs, platform = 'CUDA' if self.device == torch.device('cuda') else 'Reference', atoms = self._data.atoms, **kwargs)
+ self.physics_loss = openmm_energy(self.mol, self.std, clamp=clamp_kwargs, platform='CUDA' if self.device == torch.device('cuda') else 'Reference', atoms=self._data.atoms, **kwargs)
-[docs] def common_physics_step(self, batch, latent):
+
+[docs]
+ def common_physics_step(self, batch, latent):
'''
Called from both :func:`train_step <molearn.trainers.OpenMM_Physics_Trainer.train_step>` and :func:`valid_step <molearn.trainers.OpenMM_Physics_Trainer.valid_step>`.
Takes random interpolations between adjacent samples latent vectors. These are decoded (decoded structures saved as ``self._internal['generated'] = generated if needed elsewhere) and the energy terms calculated with ``self.physics_loss``.
@@ -84,16 +87,19 @@ Source code for molearn.trainers.openmm_physics_trainer
alpha = torch.rand(int(len(batch)//2), 1, 1).type_as(latent)
latent_interpolated = (1-alpha)*latent[:-1:2] + alpha*latent[1::2]
- generated = self.autoencoder.decode(latent_interpolated)[:,:,:batch.size(2)]
+ generated = self.autoencoder.decode(latent_interpolated)[:, :, :batch.size(2)]
self._internal['generated'] = generated
energy = self.physics_loss(generated)
- energy[energy.isinf()]=1e35
+ energy[energy.isinf()] = 1e35
energy = torch.clamp(energy, max=1e34)
energy = energy.nanmean()
- return {'physics_loss':energy}#a if not energy.isinf() else torch.tensor(0.0)}
+ return {'physics_loss':energy} # a if not energy.isinf() else torch.tensor(0.0)}
+
-[docs] def train_step(self, batch):
+
+[docs]
+ def train_step(self, batch):
'''
This method overrides :func:`Trainer.train_step <molearn.trainers.Trainer.train_step>` and adds an additional 'Physics_loss' term.
@@ -115,7 +121,10 @@ Source code for molearn.trainers.openmm_physics_trainer
results['loss'] = final_loss
return results
-[docs] def valid_step(self, batch):
+
+
+[docs]
+ def valid_step(self, batch):
'''
This method overrides :func:`Trainer.valid_step <molearn.trainers.Trainer.valid_step>` and adds an additional 'Physics_loss' term.
@@ -131,10 +140,13 @@ Source code for molearn.trainers.openmm_physics_trainer
results = self.common_step(batch)
results.update(self.common_physics_step(batch, self._internal['encoded']))
- #scale = (self.psf*results['mse_loss'])/(results['physics_loss'] +1e-5)
+ # scale = (self.psf*results['mse_loss'])/(results['physics_loss'] +1e-5)
final_loss = torch.log(results['mse_loss'])+self.psf*torch.log(results['physics_loss'])
results['loss'] = final_loss
- return results
+ return results
+
+
+
if __name__=='__main__':
pass
@@ -175,8 +187,8 @@ Navigation
\ No newline at end of file
diff --git a/docs/build/_modules/molearn/trainers/torch_physics_trainer.html b/docs/build/_modules/molearn/trainers/torch_physics_trainer.html
index 7082761..6a30ff5 100644
--- a/docs/build/_modules/molearn/trainers/torch_physics_trainer.html
+++ b/docs/build/_modules/molearn/trainers/torch_physics_trainer.html
@@ -1,18 +1,15 @@
-
-
+
molearn.trainers.torch_physics_trainer — molearn 2.0.1 documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -41,7 +38,10 @@ Source code for molearn.trainers.torch_physics_trainer
from molearn.loss_functions import TorchProteinEnergy
from .trainer import Trainer
-[docs]class Torch_Physics_Trainer(Trainer):
+
+
+[docs]
+class Torch_Physics_Trainer(Trainer):
'''
Torch_Physics_Trainer subclasses Trainer and replaces the valid_step and train_step.
An extra 'physics_loss' (bonds, angles, and torsions) is calculated using pytorch.
@@ -50,16 +50,21 @@ Source code for molearn.trainers.torch_physics_trainer
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
-[docs] def prepare_physics(self, physics_scaling_factor=0.1):
+
+[docs]
+ def prepare_physics(self, physics_scaling_factor=0.1):
'''
Create ``self.physics_loss`` object from :func:`loss_functions.TorchProteinEnergy <molearn.loss_functions.TorchProteinEnergy>`
Needs ``self.std``, ``self._data`` to have been set with :func:`Trainer.set_data <molearn.trainer.Trainer.set_data>`
:param float physics_scaling_factor: (default: 0.1) scaling factor saved to ``self.psf`` that is used in :func: `train_step <molearn.trainers.Torch_Physics_Trainer.train_step>` It will control the relative importance of mse_loss and physics_loss in training.
'''
self.psf = physics_scaling_factor
- self.physics_loss = TorchProteinEnergy(self._data.dataset[0]*self.std, pdb_atom_names = self._data.get_atominfo(), device = self.device, method = 'roll')
+ self.physics_loss = TorchProteinEnergy(self._data.dataset[0]*self.std, pdb_atom_names=self._data.get_atominfo(), device=self.device, method='roll')
-[docs] def common_physics_step(self, batch, latent):
+
+
+[docs]
+ def common_physics_step(self, batch, latent):
'''
Called from both :func:`train_step <molearn.trainers.Torch_Physics_Trainer.train_step>` and :func:`valid_step <molearn.trainers.Torch_Physics_Trainer.valid_step>`.
Takes random interpolations between adjacent samples latent vectors. These are decoded (decoded structures saved as ``self._internal['generated'] = generated if needed elsewhere) and the energy terms calculated with ``self.physics_loss``.
@@ -69,8 +74,8 @@ Source code for molearn.trainers.torch_physics_trainer
'''
alpha = torch.rand(int(len(batch)//2), 1, 1).type_as(latent)
latent_interpolated = (1-alpha)*latent[:-1:2] + alpha*latent[1::2]
- generated = self.autoencoder.decode(latent_interpolated)[:,:,:batch.size(2)]
- bond, angle, torsion = self.physics_loss._roll_bond_angle_torsion_loss(generated*self.std)
+ generated = self.autoencoder.decode(latent_interpolated)[:, :, :batch.size(2)]
+ bond, angle, torsion = self.physics_loss._roll_bond_angle_torsion_loss(generated*self.std)
n = len(generated)
bond/=n
angle/=n
@@ -78,12 +83,14 @@ Source code for molearn.trainers.torch_physics_trainer
_all = torch.tensor([bond, angle, torsion])
_all[_all.isinf()]=1e35
total_physics = _all.nansum()
- #total_physics = torch.nansum(torch.tensor([bond ,angle ,torsion]))
+ # total_physics = torch.nansum(torch.tensor([bond ,angle ,torsion]))
return {'physics_loss':total_physics, 'bond_energy':bond, 'angle_energy':angle, 'torsion_energy':torsion}
-[docs] def train_step(self, batch):
+
+[docs]
+ def train_step(self, batch):
'''
This method overrides :func:`Trainer.train_step <molearn.trainers.Trainer.train_step>` and adds an additional 'Physics_loss' term.
@@ -104,7 +111,10 @@ Source code for molearn.trainers.torch_physics_trainer
results['loss'] = final_loss
return results
-[docs] def valid_step(self, batch):
+
+
+[docs]
+ def valid_step(self, batch):
'''
This method overrides :func:`Trainer.valid_step <molearn.trainers.Trainer.valid_step>` and adds an additional 'Physics_loss' term.
@@ -119,10 +129,12 @@ Source code for molearn.trainers.torch_physics_trainer
'''
results = self.common_step(batch)
results.update(self.common_physics_step(batch, self._internal['encoded']))
- #scale = self.psf*results['mse_loss']/(results['physics_loss']+1e-5)
+ # scale = self.psf*results['mse_loss']/(results['physics_loss']+1e-5)
final_loss = torch.log(results['mse_loss'])+self.psf*torch.log(results['physics_loss'])
results['loss'] = final_loss
- return results
+ return results
+
+
if __name__=='__main__':
@@ -164,8 +176,8 @@ Navigation
\ No newline at end of file
diff --git a/docs/build/_modules/molearn/trainers/trainer.html b/docs/build/_modules/molearn/trainers/trainer.html
index 062b4bf..54ac44a 100644
--- a/docs/build/_modules/molearn/trainers/trainer.html
+++ b/docs/build/_modules/molearn/trainers/trainer.html
@@ -1,18 +1,15 @@
-
-
+
molearn.trainers.trainer — molearn 2.0.1 documentation
-
-
-
-
-
-
-
+
+
+
+
+
@@ -44,14 +41,16 @@ Source code for molearn.trainers.trainer
import time
import torch
from molearn.data import PDBData
-import warnings
-from decimal import Decimal
import json
+
class TrainingFailure(Exception):
pass
-[docs]class Trainer():
+
+
+[docs]
+class Trainer:
'''
Trainer class that defines a number of useful methods for training an autoencoder.
@@ -70,9 +69,7 @@ Source code for molearn.trainers.trainer
'''
-
-
- def __init__(self, device = None, log_filename = 'log_file.dat'):
+ def __init__(self, device=None, log_filename='log_file.dat'):
'''
:param torch.Device device: if not given will be determinined automatically based on torch.cuda.is_available()
:param str log_filename: (default: 'default_log_filename.json') file used to log outputs to
@@ -90,7 +87,9 @@ Source code for molearn.trainers.trainer
self.log_filename = 'default_log_filename.json'
self.scheduler_key = None
-[docs] def get_network_summary(self,):
+
+[docs]
+ def get_network_summary(self):
'''
returns a dictionary containing information about the size of the autoencoder.
'''
@@ -98,15 +97,17 @@ Source code for molearn.trainers.trainer
return sum(p.numel() for p in model.parameters() if (p.requires_grad and trainable_only))
return dict(
- encoder_trainable = get_parameters(True, self.autoencoder.encoder),
- encoder_total = get_parameters(False, self.autoencoder.encoder),
- decoder_trainable = get_parameters(True, self.autoencoder.decoder),
- decoder_total = get_parameters(False, self.autoencoder.decoder),
- autoencoder_trainable = get_parameters(True, self.autoencoder),
- autoencoder_total = get_parameters(False, self.autoencoder),
- )
-
-[docs] def set_autoencoder(self, autoencoder, **kwargs):
+ encoder_trainable=get_parameters(True, self.autoencoder.encoder),
+ encoder_total=get_parameters(False, self.autoencoder.encoder),
+ decoder_trainable=get_parameters(True, self.autoencoder.decoder),
+ decoder_total=get_parameters(False, self.autoencoder.decoder),
+ autoencoder_trainable=get_parameters(True, self.autoencoder),
+ autoencoder_total=get_parameters(False, self.autoencoder))
+
+
+
+[docs]
+ def set_autoencoder(self, autoencoder, **kwargs):
'''
:param autoencoder: (:func:`autoencoder <molearn.models>`,) torch network class that implements ``autoencoder.encode``, and ``autoencoder.decode``. Please pass the class not the instance
:param \*\*kwargs: any other kwargs given to this method will be used to initialise the network ``self.autoencoder = autoencoder(**kwargs)``
@@ -117,7 +118,10 @@ Source code for molearn.trainers.trainer
self.autoencoder = autoencoder.to(self.device)
self._autoencoder_kwargs = kwargs
-[docs] def set_dataloader(self, train_dataloader=None, valid_dataloader=None):
+
+
+[docs]
+ def set_dataloader(self, train_dataloader=None, valid_dataloader=None):
'''
:param torch.DataLoader train_dataloader: Alternatively set using ``trainer.train_dataloader = dataloader``
:param torch.DataLoader valid_dataloader: Alternatively set using ``trainer.valid_dataloader = dataloader``
@@ -127,7 +131,10 @@ Source code for molearn.trainers.trainer
if valid_dataloader is not None:
self.valid_dataloader = valid_dataloader
-[docs] def set_data(self, data, **kwargs):
+
+
+[docs]
+ def set_data(self, data, **kwargs):
'''
Sets up internal variables and gives trainer access to dataloaders.
``self.train_dataloader``, ``self.valid_dataloader``, ``self.std``, ``self.mean``, ``self.mol`` will all be obtained from this object.
@@ -146,7 +153,9 @@ Source code for molearn.trainers.trainer
self._data = data
-[docs] def prepare_optimiser(self, lr = 1e-3, weight_decay = 0.0001, **optimiser_kwargs):
+
+[docs]
+ def prepare_optimiser(self, lr=1e-3, weight_decay=0.0001, **optimiser_kwargs):
'''
The Default optimiser is ``AdamW`` and is saved in ``self.optimiser``.
With no optional arguments this function is the same as doing:
@@ -156,9 +165,12 @@ Source code for molearn.trainers.trainer
:param float weight_decay: (default: 0.0001) optimiser weight_decay
:param \*\*optimiser_kwargs: other kwargs that are passed onto AdamW
'''
- self.optimiser = torch.optim.AdamW(self.autoencoder.parameters(), lr=lr, weight_decay = weight_decay, **optimiser_kwargs)
+ self.optimiser = torch.optim.AdamW(self.autoencoder.parameters(), lr=lr, weight_decay=weight_decay, **optimiser_kwargs)
-[docs] def log(self, log_dict, verbose=None):
+
+
+[docs]
+ def log(self, log_dict, verbose=None):
'''
Then contents of log_dict are dumped using ``json.dumps(log_dict)`` and printed and/or appended to ``self.log_filename``
This function is called from :func:`self.run <molearn.trainers.Trainer.run>`
@@ -173,7 +185,10 @@ Source code for molearn.trainers.trainer
with open(self.log_filename, 'a') as f:
f.write(dump+'\n')
-[docs] def scheduler_step(self, logs):
+
+
+[docs]
+ def scheduler_step(self, logs):
'''
This function does nothing. It is called after :func:`self.valid_epoch <molearn.trainers.Trainer.valid_epoch>` in :func:`Trainer.run() <molearn.trainers.Trainer.run>` and before :func:`checkpointing <molearn.trainers.Trainer.checkpoint>`. It is designed to be overridden if you wish to use a scheduler.
@@ -181,7 +196,10 @@ Source code for molearn.trainers.trainer