diff --git a/README.md b/README.md index 7963f7d..46b4e3e 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,8 @@ sys.path.insert(0, 'path/to/molearn/src') import molearn ``` +> **Note** +> in case of installation issues, please consult our FAQ [molearn.readthedocs.io](https://molearn.readthedocs.io/en/latest/FAQ.html) ## Usage ## diff --git a/docs/build/.buildinfo b/docs/build/.buildinfo index 257c332..0e86985 100644 --- a/docs/build/.buildinfo +++ b/docs/build/.buildinfo @@ -1,4 +1,4 @@ # Sphinx build info version 1 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. -config: 3a4be983efd1ad1578e3aba1f56753bc +config: fd34726b2b2ff5441545b10d48d8c981 tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/docs/build/.doctrees/analysis.doctree b/docs/build/.doctrees/analysis.doctree index 93e364c..8e88fe4 100644 Binary files a/docs/build/.doctrees/analysis.doctree and b/docs/build/.doctrees/analysis.doctree differ diff --git a/docs/build/.doctrees/data.doctree b/docs/build/.doctrees/data.doctree index 0e78a2e..cb3babe 100644 Binary files a/docs/build/.doctrees/data.doctree and b/docs/build/.doctrees/data.doctree differ diff --git a/docs/build/.doctrees/environment.pickle b/docs/build/.doctrees/environment.pickle index 91b4dfc..ed1bb00 100644 Binary files a/docs/build/.doctrees/environment.pickle and b/docs/build/.doctrees/environment.pickle differ diff --git a/docs/build/.doctrees/faq.doctree b/docs/build/.doctrees/faq.doctree index 03910b2..21fceec 100644 Binary files a/docs/build/.doctrees/faq.doctree and b/docs/build/.doctrees/faq.doctree differ diff --git a/docs/build/.doctrees/index.doctree b/docs/build/.doctrees/index.doctree index c8e13b2..cffc055 100644 Binary files a/docs/build/.doctrees/index.doctree and b/docs/build/.doctrees/index.doctree differ diff --git a/docs/build/.doctrees/loss_functions.doctree b/docs/build/.doctrees/loss_functions.doctree index b550835..acdcf93 100644 Binary files a/docs/build/.doctrees/loss_functions.doctree and b/docs/build/.doctrees/loss_functions.doctree differ diff --git a/docs/build/.doctrees/models.doctree b/docs/build/.doctrees/models.doctree index ce5c2ee..395a795 100644 Binary files a/docs/build/.doctrees/models.doctree and b/docs/build/.doctrees/models.doctree differ diff --git a/docs/build/.doctrees/scoring.doctree b/docs/build/.doctrees/scoring.doctree index 831829c..c025d9e 100644 Binary files a/docs/build/.doctrees/scoring.doctree and b/docs/build/.doctrees/scoring.doctree differ diff --git a/docs/build/.doctrees/trainers.doctree b/docs/build/.doctrees/trainers.doctree index 1dfd005..7abbbf7 100644 Binary files a/docs/build/.doctrees/trainers.doctree and b/docs/build/.doctrees/trainers.doctree differ diff --git a/docs/build/_modules/index.html b/docs/build/_modules/index.html index ffc1175..b495099 100644 --- a/docs/build/_modules/index.html +++ b/docs/build/_modules/index.html @@ -1,18 +1,15 @@ - - + Overview: module code — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -84,8 +81,8 @@

Navigation

\ No newline at end of file diff --git a/docs/build/_modules/molearn/analysis/GUI.html b/docs/build/_modules/molearn/analysis/GUI.html index 16aead1..e51c40e 100644 --- a/docs/build/_modules/molearn/analysis/GUI.html +++ b/docs/build/_modules/molearn/analysis/GUI.html @@ -1,18 +1,15 @@ - - + molearn.analysis.GUI — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -71,7 +68,9 @@

Source code for molearn.analysis.GUI

 from ..utils import as_numpy
 
 
-
[docs]class MolearnGUI(object): +
+[docs] +class MolearnGUI: ''' This class produces an interactive visualisation for data stored in a :func:`MolearnAnalysis <molearn.analysis.MolearnAnalysis>` object, @@ -88,11 +87,10 @@

Source code for molearn.analysis.GUI

         else:
             self.MA = MA
 
-        self.waypoints = [] # collection of all saved waypoints
-        self.samples = [] # collection of all calculated sampling points
+        self.waypoints = []  # collection of all saved waypoints
+        self.samples = []    # collection of all calculated sampling points
         
         self.run()
-
      
     def update_trails(self):
         '''
@@ -108,14 +106,14 @@ 

Source code for molearn.analysis.GUI

 
         # update latent space plot
         if len(self.samples) == 0:
-            self.latent.data[2].x = self.waypoints[:, 0]
-            self.latent.data[2].y = self.waypoints[:, 1]
+            if len(self.waypoints)>0:
+                self.latent.data[2].x = self.waypoints[:, 0]
+                self.latent.data[2].y = self.waypoints[:, 1]
         else:
             self.latent.data[2].x = self.samples[:, 0]
             self.latent.data[2].y = self.samples[:, 1]
         
         self.latent.update()
-
     
     def on_click(self, trace, points, selector):
         '''
@@ -136,14 +134,13 @@ 

Source code for molearn.analysis.GUI

         # update textbox (triggering update of 3D representation)
         try:
             pt = self.waypoints.flatten().round(decimals=4).astype(str)
-            #pt = np.array([self.latent.data[3].x, self.latent.data[3].y]).T.flatten().round(decimals=4).astype(str)
+            # pt = np.array([self.latent.data[3].x, self.latent.data[3].y]).T.flatten().round(decimals=4).astype(str)
             self.mybox.value = " ".join(pt)
         except Exception:
             return
 
         self.update_trails()    
         
-
     def get_samples(self, mybox, samplebox, path):
         '''
         provide a trail of point between list of waypoints, either connected
@@ -159,8 +156,8 @@ 

Source code for molearn.analysis.GUI

             crd = np.array(mybox.split()).astype(float)
             crd = crd.reshape((int(len(crd)/2), 2))
         except Exception:
-           raise Exception("Cannot define sampling points")
-           return
+            raise Exception("Cannot define sampling points")
+            return
     
         if use_path:
             # connect points via A*
@@ -168,8 +165,8 @@ 

Source code for molearn.analysis.GUI

                 landscape = self.latent.data[0].z
                 crd = get_path_aggregate(crd, landscape.T, self.MA.xvals, self.MA.yvals)
             except Exception as e:
-               raise Exception(f"Cannot define sampling points: path finding failed. {e})")
-               return
+                raise Exception(f"Cannot define sampling points: path finding failed. {e})")
+                return
                                          
         else:
             # connect points via straight line
@@ -180,7 +177,6 @@ 

Source code for molearn.analysis.GUI

                 return
 
         return crd
-
         
     def interact_3D(self, mybox, samplebox, path):
         '''
@@ -191,7 +187,7 @@ 

Source code for molearn.analysis.GUI

             crd = self.get_samples(mybox, samplebox, path)
             self.samples = crd.copy()
             crd = crd.reshape((1, len(crd), 2))
-        except:
+        except Exception:
             self.button_pdb.disabled = True
             return
 
@@ -208,12 +204,11 @@ 

Source code for molearn.analysis.GUI

         self.mymol.load_new(gen)
         view = nv.show_mdanalysis(self.mymol)
         view.add_representation("spacefill")
-        #view.add_representation("cartoon")
+        # view.add_representation("cartoon")
         display.display(view)
 
         self.button_pdb.disabled = False
 
-
     def drop_background_event(self, change):
         '''
         control colouring style of latent space surface
@@ -225,7 +220,7 @@ 

Source code for molearn.analysis.GUI

             mykey = change.new
    
         try:
-           data = self.MA.surfaces[mykey]
+            data = self.MA.surfaces[mykey]
         except Exception as e:
             print(f"{e}")
             return      
@@ -243,7 +238,7 @@ 

Source code for molearn.analysis.GUI

             self.latent.data[0].zmax = np.max(data)
             self.block0.children[1].min = np.min(data)
             self.block0.children[1].max = np.max(data)
-        except:
+        except Exception:
             self.latent.data[0].zmax = np.max(data)
             self.latent.data[0].zmin = np.min(data)
             self.block0.children[1].max = np.max(data)
@@ -253,7 +248,6 @@ 

Source code for molearn.analysis.GUI

         
         self.update_trails()
 
-
     def drop_dataset_event(self, change):
         '''
         control which dataset is displayed
@@ -265,7 +259,7 @@ 

Source code for molearn.analysis.GUI

             
         else:
             try:
-               data = as_numpy(self.MA.get_encoded(change.new).squeeze(2))
+                data = as_numpy(self.MA.get_encoded(change.new).squeeze(2))
             except Exception as e:
                 print(f"{e}")
                 return      
@@ -277,7 +271,6 @@ 

Source code for molearn.analysis.GUI

         
         self.latent.update()
 
-
     def drop_path_event(self, change):
         '''
         control way paths are looked for
@@ -290,7 +283,6 @@ 

Source code for molearn.analysis.GUI

             
         self.update_trails()
 
-
     def range_slider_event(self, change):
         '''
         update surface colouring upon manipulation of range slider
@@ -300,7 +292,6 @@ 

Source code for molearn.analysis.GUI

         self.latent.data[0].zmax = change.new[1]
         self.latent.update()
 
-
     def trail_update_event(self, change):
         '''
         update trails (waypoints and way they are connected)
@@ -309,7 +300,7 @@ 

Source code for molearn.analysis.GUI

         try:
             crd = np.array(self.mybox.value.split()).astype(float)
             crd = crd.reshape((int(len(crd)/2), 2))
-        except:
+        except Exception:
             self.button_pdb.disabled = False
             return
 
@@ -317,7 +308,6 @@ 

Source code for molearn.analysis.GUI

 
         self.update_trails()
 
-
     def button_pdb_event(self, check):
         '''
         save PDB file corresponding to the interpolation shown in the 3D view
@@ -346,7 +336,6 @@ 

Source code for molearn.analysis.GUI

             for ts in self.mymol.trajectory:
                 W.write(protein)
 
-
     def button_save_state_event(self, check):
         '''
         save class state
@@ -360,8 +349,7 @@ 

Source code for molearn.analysis.GUI

         if fname == "":
             return
 
-        pickle.dump([self.MA, self.waypoints], open( fname, "wb" ) )
-
+        pickle.dump([self.MA, self.waypoints], open(fname, "wb"))
 
     def button_load_state_event(self, check):
         '''
@@ -377,7 +365,7 @@ 

Source code for molearn.analysis.GUI

             return
 
         try:
-            self.MA, self.waypoints = pickle.load( open( fname, "rb" ) )
+            self.MA, self.waypoints = pickle.load(open(fname, "rb"))
             self.run()
         except Exception as e:
             raise Exception(f"Cannot load state file. {e}")
@@ -388,7 +376,7 @@ 

Source code for molearn.analysis.GUI

 
         # create an MDAnalysis instance of input protein (for viewing purposes)
         if hasattr(self.MA, "mol"):
-            self.MA.mol.write_pdb("tmp.pdb", conformations=[0], split_struc = False)
+            self.MA.mol.write_pdb("tmp.pdb", conformations=[0], split_struc=False)
             self.mymol = mda.Universe('tmp.pdb')
         
         ### MENU ITEMS ###
@@ -415,7 +403,6 @@ 

Source code for molearn.analysis.GUI

         
         self.drop_background.observe(self.drop_background_event, names='value')
 
-
         # dataset selector dropdown menu
         options2 = ["none"]
         if self.MA is not None:
@@ -445,7 +432,6 @@ 

Source code for molearn.analysis.GUI

 
         self.drop_path.observe(self.drop_path_event, names='value')
 
-
         # text box holding current coordinates
         self.mybox = widgets.Textarea(placeholder='coordinates',
                                  description='crds:',
@@ -460,7 +446,6 @@ 

Source code for molearn.analysis.GUI

 
         self.samplebox.observe(self.trail_update_event, names='value')
 
-
         # button to save PDB file
         self.button_pdb = widgets.Button(
             description='Save PDB',
@@ -468,23 +453,20 @@ 

Source code for molearn.analysis.GUI

 
         self.button_pdb.on_click(self.button_pdb_event)
 
-
         # button to save state file
         self.button_save_state = widgets.Button(
-            description= 'Save state',
+            description='Save state',
             disabled=False, layout=Layout(flex='1 1 0%', width='auto'))
 
         self.button_save_state.on_click(self.button_save_state_event)
 
-
         # button to load state file
         self.button_load_state = widgets.Button(
-            description= 'Load state',
+            description='Load state',
             disabled=False, layout=Layout(flex='1 1 0%', width='auto'))
 
         self.button_load_state.on_click(self.button_load_state_event)
 
-
         # latent space range slider
         self.range_slider = widgets.FloatRangeSlider(
             description='cmap range:',
@@ -502,8 +484,7 @@ 

Source code for molearn.analysis.GUI

             
         if self.waypoints == []:
             self.button_pdb.disabled = True
-
-        
+  
         ### LATENT SPACE REPRESENTATION ###
 
         # surface 
@@ -541,7 +522,7 @@ 

Source code for molearn.analysis.GUI

 
         # path
         plot3 = go.Scatter(x=np.array([]), y=np.array([]),
-                   showlegend=False, opacity=0.9, mode = 'lines+markers',
+                   showlegend=False, opacity=0.9, mode='lines+markers',
                    marker=dict(color='red', size=4))
 
         self.latent = go.FigureWidget([plot1, plot2, plot3])
@@ -560,7 +541,7 @@ 

Source code for molearn.analysis.GUI

             try:
                 self.range_slider.min = scmin
                 self.range_slider.max = scmax
-            except:
+            except Exception:
                 self.range_slider.max = scmax
                 self.range_slider.min = scmin
 
@@ -569,8 +550,7 @@ 

Source code for molearn.analysis.GUI

 
         # 3D protein representation (triggered by update of textbox, sampling box, or pathfinding method)
         self.protein = widgets.interactive_output(self.interact_3D, {'mybox': self.mybox, 'samplebox': self.samplebox, 'path': self.drop_path})
-
-        
+     
         ### WIDGETS ARRANGEMENT ###
         
         self.block0 = widgets.VBox([self.drop_dataset, self.range_slider,
@@ -594,7 +574,7 @@ 

Source code for molearn.analysis.GUI

 
         display.clear_output(wait=True)
         display.display(self.scene)
- +
@@ -632,8 +612,8 @@

Navigation

\ No newline at end of file diff --git a/docs/build/_modules/molearn/analysis/analyser.html b/docs/build/_modules/molearn/analysis/analyser.html index d8d6668..6236727 100644 --- a/docs/build/_modules/molearn/analysis/analyser.html +++ b/docs/build/_modules/molearn/analysis/analyser.html @@ -1,18 +1,15 @@ - - + molearn.analysis.analyser — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -60,7 +57,16 @@

Source code for molearn.analysis.analyser

     print('Error importing modeller: ')
     print(e)
     
-from ..scoring import Parallel_DOPE_Score, Parallel_Ramachandran_Score
+try:
+    from ..scoring import Parallel_DOPE_Score
+except ImportError as e:
+    print('Import Error captured while trying to import Parallel_DOPE_Score, it is likely that you dont have Modeller installed')
+    print(e)
+try:
+    from ..scoring import Parallel_Ramachandran_Score
+except ImportError as e:
+    print('Import Error captured while trying to import Parallel_Ramachandran_Score, it is likely that you dont have cctbx/iotbx installed')
+    print(e)
 from ..data import PDBData
 
 from ..utils import as_numpy
@@ -69,7 +75,9 @@ 

Source code for molearn.analysis.analyser

 warnings.filterwarnings("ignore")
 
 
-
[docs]class MolearnAnalysis(object): +
+[docs] +class MolearnAnalysis: ''' This class provides methods dedicated to the quality analysis of a trained model. @@ -83,7 +91,9 @@

Source code for molearn.analysis.analyser

         self.batch_size = 1
         self.processes = 1
 
-
[docs] def set_network(self, network): +
+[docs] + def set_network(self, network): ''' :param network: a trained neural network defined in :func:`molearn.models <molearn.models>` ''' @@ -91,13 +101,19 @@

Source code for molearn.analysis.analyser

         self.network.eval()
         self.device = next(network.parameters()).device
-
[docs] def get_dataset(self, key): + +
+[docs] + def get_dataset(self, key): ''' :param str key: key pointing to a dataset previously loaded with :func:`set_dataset <molearn.analysis.MolearnAnalysis.set_dataset>` ''' return self._datasets[key]
-
[docs] def set_dataset(self, key, data, atomselect="*"): + +
+[docs] + def set_dataset(self, key, data, atomselect="*"): ''' :param data: :func:`PDBData <molearn.data.PDBData>` object containing atomic coordinates :param str key: label to be associated with data @@ -127,7 +143,10 @@

Source code for molearn.analysis.analyser

         if not hasattr(self, 'shape'):
             self.shape = (_data.dataset.shape[1], _data.dataset.shape[2])
-
[docs] def get_encoded(self, key): + +
+[docs] + def get_encoded(self, key): ''' :param str key: key pointing to a dataset previously loaded with :func:`set_dataset <molearn.analysis.MolearnAnalysis.set_dataset>` :return: array containing the encoding in latent space of dataset associated with key @@ -148,13 +167,19 @@

Source code for molearn.analysis.analyser

                 
         return self._encoded[key]
-
[docs] def set_encoded(self, key, coords): + +
+[docs] + def set_encoded(self, key, coords): ''' :param str key: key pointing to a dataset previously loaded with :func:`set_dataset <molearn.analysis.MolearnAnalysis.set_dataset>` ''' self._encoded[key] = torch.tensor(coords).float()
-
[docs] def get_decoded(self, key): + +
+[docs] + def get_decoded(self, key): ''' :param str key: key pointing to a dataset previously loaded with :func:`set_dataset <molearn.analysis.MolearnAnalysis.set_dataset>` ''' @@ -164,23 +189,32 @@

Source code for molearn.analysis.analyser

                 encoded = self.get_encoded(key)
                 decoded = torch.empty(encoded.shape[0], *self.shape).float()
                 for i in tqdm(range(0, encoded.shape[0], batch_size), desc=f'Decoding {key}'):
-                    decoded[i:i+batch_size] = self.network.decode(encoded[i:i+batch_size].to(self.device))[:,:,:self.shape[1]].cpu()
+                    decoded[i:i+batch_size] = self.network.decode(encoded[i:i+batch_size].to(self.device))[:, :, :self.shape[1]].cpu()
                 self._decoded[key] = decoded
         return self._decoded[key]
-
[docs] def set_decoded(self, key, structures): + +
+[docs] + def set_decoded(self, key, structures): ''' :param str key: key pointing to a dataset previously loaded with :func:`set_dataset <molearn.analysis.MolearnAnalysis.set_dataset>` ''' self._decoded[key] = structures
-
[docs] def num_trainable_params(self): + +
+[docs] + def num_trainable_params(self): ''' :return: number of trainable parameters in the neural network previously loaded with :func:`set_dataset <molearn.analysis.MolearnAnalysis.set_network>` ''' return sum(p.numel() for p in self.network.parameters() if p.requires_grad)
-
[docs] def get_error(self, key, align=False): + +
+[docs] + def get_error(self, key, align=True): ''' Calculate the reconstruction error of a dataset encoded and decoded by a trained neural network. @@ -196,21 +230,24 @@

Source code for molearn.analysis.analyser

         m = deepcopy(self.mol)
         for i in range(dataset.shape[0]):
             crd_ref = as_numpy(dataset[i].permute(1,0).unsqueeze(0))*self.stdval + self.meanval
-            crd_mdl = as_numpy(decoded[i].permute(1,0).unsqueeze(0))[:, :dataset.shape[2]]*self.stdval + self.meanval #clip the padding of models  
-            if align: # use Molecule Biobox class to calculate RMSD
+            crd_mdl = as_numpy(decoded[i].permute(1,0).unsqueeze(0))[:, :dataset.shape[2]]*self.stdval + self.meanval  # clip the padding of models  
+            # use Molecule Biobox class to calculate RMSD
+            if align:
                 m.coordinates = deepcopy(crd_ref)
                 m.set_current(0)
                 m.add_xyz(crd_mdl[0])
                 rmsd = m.rmsd(0, 1)
             else:
-                rmsd = np.sqrt(np.sum((crd_ref.flatten()-crd_mdl.flatten())**2)/crd_mdl.shape[1]) # Cartesian L2 norm
+                rmsd = np.sqrt(np.sum((crd_ref.flatten()-crd_mdl.flatten())**2)/crd_mdl.shape[1])  # Cartesian L2 norm
 
             err.append(rmsd)
 
         return np.array(err)
-
[docs] def get_dope(self, key, refine=True, **kwargs): +
+[docs] + def get_dope(self, key, refine=True, **kwargs): ''' :param str key: key pointing to a dataset previously loaded with :func:`set_dataset <molearn.analysis.MolearnAnalysis.set_dataset>` :param bool refine: if True, refine structures before calculating DOPE score @@ -219,13 +256,16 @@

Source code for molearn.analysis.analyser

         dataset = self.get_dataset(key)
         decoded = self.get_decoded(key)
         
-        dope_dataset = self.get_all_dope_score(dataset, refine=refine,**kwargs)
-        dope_decoded = self.get_all_dope_score(decoded, refine=refine,**kwargs)
+        dope_dataset = self.get_all_dope_score(dataset, refine=refine, **kwargs)
+        dope_decoded = self.get_all_dope_score(decoded, refine=refine, **kwargs)
+
+        return dict(dataset_dope=dope_dataset, 
+                    decoded_dope=dope_decoded)
- return dict(dataset_dope = dope_dataset, - decoded_dope = dope_decoded)
-
[docs] def get_ramachandran(self, key): +
+[docs] + def get_ramachandran(self, key): ''' :param str key: key pointing to a dataset previously loaded with :func:`set_dataset <molearn.analysis.MolearnAnalysis.set_dataset>` ''' @@ -237,7 +277,10 @@

Source code for molearn.analysis.analyser

         ramachandran.update({f'decoded_{key}':value for key, value in self.get_all_ramachandran_score(decoded).items()})
         return ramachandran
-
[docs] def setup_grid(self, samples=64, bounds_from=None, bounds=None, padding=0.1): + +
+[docs] + def setup_grid(self, samples=64, bounds_from=None, bounds=None, padding=0.1): ''' Define a NxN point grid regularly sampling the latent space. @@ -252,7 +295,7 @@

Source code for molearn.analysis.analyser

             if bounds_from is None:
                 bounds_from = "all"
             
-            bounds = self._get_bounds(bounds_from, exclude = key)
+            bounds = self._get_bounds(bounds_from, exclude=key)
         
         bx = (bounds[1]-bounds[0])*padding
         by = (bounds[3]-bounds[2])*padding
@@ -260,12 +303,13 @@ 

Source code for molearn.analysis.analyser

         self.yvals = np.linspace(bounds[2]-by, bounds[3]+by, samples)
         self.n_samples = samples
         meshgrid = np.meshgrid(self.xvals, self.yvals)
-        stack = np.stack(meshgrid, axis=2).reshape(-1,1,2)
+        stack = np.stack(meshgrid, axis=2).reshape(-1, 1, 2)
         self.set_encoded(key, stack)
         
         return key
- def _get_bounds(self, bounds_from, exclude = ['grid', 'grid_decoded']): + + def _get_bounds(self, bounds_from, exclude=['grid', 'grid_decoded']): ''' :param bounds_from: keys of datasets to be considered for identification of boundaries in latent space :param exclude: keys of dataset not to consider @@ -282,16 +326,18 @@

Source code for molearn.analysis.analyser

         xmin, ymin, xmax, ymax = [], [], [], []
         for key in bounds_from:
             z = self.get_encoded(key)
-            xmin.append(z[:,0].min())
-            ymin.append(z[:,1].min())
-            xmax.append(z[:,0].max())
-            ymax.append(z[:,1].max())
+            xmin.append(z[:, 0].min())
+            ymin.append(z[:, 1].min())
+            xmax.append(z[:, 0].max())
+            ymax.append(z[:, 1].max())
             
         xmin, ymin = min(xmin), min(ymin)
         xmax, ymax = max(xmax), max(ymax)
         return xmin, xmax, ymin, ymax
 
-
[docs] def scan_error_from_target(self, key, index=None, align=False): +
+[docs] + def scan_error_from_target(self, key, index=None, align=True): ''' Calculate landscape of RMSD vs single target structure. Target should be previously loaded datset containing a single conformation. @@ -315,19 +361,22 @@

Source code for molearn.analysis.analyser

             
             decoded = self.get_decoded('grid')
             if align:
-                crd_ref = as_numpy(target.permute(0,2,1))*self.stdval
-                crd_mdl = as_numpy(decoded.permute(0,2,1))*self.stdval
+                crd_ref = as_numpy(target.permute(0, 2, 1))*self.stdval
+                crd_mdl = as_numpy(decoded.permute(0, 2, 1))*self.stdval
                 m = deepcopy(self.mol)
                 m.coordinates = np.concatenate([crd_ref, crd_mdl])
                 m.set_current(0)
-                rmsd = np.array([m.rmsd(0,i) for i in range(1, len(m.coordinates))])
+                rmsd = np.array([m.rmsd(0, i) for i in range(1, len(m.coordinates))])
             else:
                 rmsd = (((decoded-target)*self.stdval)**2).sum(axis=1).mean(axis=-1).sqrt()
-            self.surfaces[s_key] = rmsd.reshape(self.n_samples, self.n_samples).numpy()
+            self.surfaces[s_key] = as_numpy(rmsd.reshape(self.n_samples, self.n_samples))
             
         return self.surfaces[s_key], self.xvals, self.yvals
-
[docs] def scan_error(self, s_key='Network_RMSD', z_key='Network_z_drift'): + +
+[docs] + def scan_error(self, s_key='Network_RMSD', z_key='Network_z_drift'): ''' Calculate RMSD and z-drift on a grid sampling the latent space. Requires a grid system to be defined via a prior call to :func:`set_dataset <molearn.analysis.MolearnAnalysis.setup_grid>`. @@ -343,30 +392,32 @@

Source code for molearn.analysis.analyser

         z_key = 'Network_z_drift'
         if s_key not in self.surfaces:
             assert 'grid' in self._encoded, 'make sure to call MolearnAnalysis.setup_grid first'
-            decoded = self.get_decoded('grid')           # decode grid 
-            #self.set_dataset('grid_decoded', decoded)    # add back as dataset w. different name
+            decoded = self.get_decoded('grid')            # decode grid 
+            # self.set_dataset('grid_decoded', decoded)   # add back as dataset w. different name
             self._datasets['grid_decoded'] = decoded
-            decoded_2 = self.get_decoded('grid_decoded') # encode, and decode a second time
-            grid = self.get_encoded('grid')              # retrieve original grid
-            grid_2 = self.get_encoded('grid_decoded')    # retrieve decoded encoded grid
+            decoded_2 = self.get_decoded('grid_decoded')  # encode, and decode a second time
+            grid = self.get_encoded('grid')               # retrieve original grid
+            grid_2 = self.get_encoded('grid_decoded')     # retrieve decoded encoded grid
 
             rmsd = (((decoded-decoded_2)*self.stdval)**2).sum(axis=1).mean(axis=-1).sqrt()
             z_drift = ((grid-grid_2)**2).mean(axis=2).mean(axis=1).sqrt()
 
             self.surfaces[s_key] = rmsd.reshape(self.n_samples, self.n_samples).numpy()
             self.surfaces[z_key] = z_drift.reshape(self.n_samples, self.n_samples).numpy()
+            
         return self.surfaces[s_key], self.surfaces[z_key], self.xvals, self.yvals
+ def _ramachandran_score(self, frame): ''' returns multiprocessing AsyncResult AsyncResult.get() will return the result ''' if not hasattr(self, 'ramachandran_score_class'): - self.ramachandran_score_class = Parallel_Ramachandran_Score(self.mol, self.processes) #Parallel_Ramachandran_Score(self.mol) + self.ramachandran_score_class = Parallel_Ramachandran_Score(self.mol, self.processes) assert len(frame.shape) == 2, f'We wanted 2D data but got {len(frame.shape)} dimensions' if frame.shape[0] == 3: - f = frame.permute(1,0) + f = frame.permute(1, 0) else: assert frame.shape[1] == 3 f = frame @@ -374,9 +425,8 @@

Source code for molearn.analysis.analyser

             f = f.data.cpu().numpy()
         
         return self.ramachandran_score_class.get_score(f*self.stdval)
-        #nf, na, no, nt = self.ramachandran_score_class.get_score(f*self.stdval)
-        #return {'favored':nf, 'allowed':na, 'outliers':no, 'total':nt}
-
+        # nf, na, no, nt = self.ramachandran_score_class.get_score(f*self.stdval)
+        # return {'favored':nf, 'allowed':na, 'outliers':no, 'total':nt}
 
     def _dope_score(self, frame, refine=True, **kwargs):
         '''
@@ -388,16 +438,18 @@ 

Source code for molearn.analysis.analyser

 
         assert len(frame.shape) == 2, f'We wanted 2D data but got {len(frame.shape)} dimensions'
         if frame.shape[0] == 3:
-            f = frame.permute(1,0)
+            f = frame.permute(1, 0)
         else:
-            assert frame.shape[1] ==3
+            assert frame.shape[1] == 3
             f = frame
         if isinstance(f,torch.Tensor):
             f = f.data.cpu().numpy()
 
         return self.dope_score_class.get_score(f*self.stdval, refine=refine, **kwargs)
 
-
[docs] def get_all_ramachandran_score(self, tensor): +
+[docs] + def get_all_ramachandran_score(self, tensor): ''' Calculate Ramachandran score of an ensemble of atomic conrdinates. @@ -407,7 +459,7 @@

Source code for molearn.analysis.analyser

         results = []
         for f in tensor:
             results.append(self._ramachandran_score(f))
-        for r in tqdm(results,desc=f'Calc rama'):
+        for r in tqdm(results,desc='Calc rama'):
             favored, allowed, outliers, total = r.get()
             rama['favored'].append(favored)
             rama['allowed'].append(allowed)
@@ -415,7 +467,10 @@ 

Source code for molearn.analysis.analyser

             rama['total'].append(total)
         return {key:np.array(value) for key, value in rama.items()}       
-
[docs] def get_all_dope_score(self, tensor, refine=True): + +
+[docs] + def get_all_dope_score(self, tensor, refine=True): ''' Calculate DOPE score of an ensemble of atom coordinates. @@ -425,16 +480,19 @@

Source code for molearn.analysis.analyser

         results = []
         for f in tensor:
             results.append(self._dope_score(f, refine=refine))
-        results = np.array([r.get() for r in tqdm(results, desc=f'Calc Dope')])
+        results = np.array([r.get() for r in tqdm(results, desc='Calc Dope')])
         return results
-
[docs] def reference_dope_score(self, frame): + +
+[docs] + def reference_dope_score(self, frame): ''' :param numpy.array frame: array with shape [1, N, 3] with Cartesian coordinates of atoms :return: DOPE score ''' self.mol.coordinates = deepcopy(frame) - self.mol.write_pdb('tmp.pdb', split_struc = False) + self.mol.write_pdb('tmp.pdb', split_struc=False) env = Environ() env.libs.topology.read(file='$(LIB)/top_heav.lib') env.libs.parameters.read(file='$(LIB)/par.lib') @@ -443,7 +501,10 @@

Source code for molearn.analysis.analyser

         score = atmsel.assess_dope()
         return score
-
[docs] def scan_dope(self, key=None, refine=True, **kwargs): + +
+[docs] + def scan_dope(self, key=None, refine=True, **kwargs): ''' Calculate DOPE score on a grid sampling the latent space. Requires a grid system to be defined via a prior call to :func:`set_dataset <molearn.analysis.MolearnAnalysis.setup_grid>`. @@ -468,13 +529,16 @@

Source code for molearn.analysis.analyser

             decoded = self.get_decoded('grid')
             result = self.get_all_dope_score(decoded, refine=refine, **kwargs)
             if refine=='both':
-                self.surfaces[key] = as_numpy(result.reshape(self.n_samples, self.n_samples,2))
+                self.surfaces[key] = as_numpy(result.reshape(self.n_samples, self.n_samples, 2))
             else:
                 self.surfaces[key] = as_numpy(result.reshape(self.n_samples, self.n_samples))
             
         return self.surfaces[key], self.xvals, self.yvals
-
[docs] def scan_ramachandran(self): + +
+[docs] + def scan_ramachandran(self): ''' Calculate Ramachandran scores on a grid sampling the latent space. Requires a grid system to be defined via a prior call to :func:`set_dataset <molearn.analysis.MolearnAnalysis.setup_grid>`. @@ -493,8 +557,11 @@

Source code for molearn.analysis.analyser

                 self.surfaces[keys[key]] = value
 
         return self.surfaces['Ramachandran_favored'], self.xvals, self.yvals
+ -
[docs] def scan_custom(self, fct, params, key): +
+[docs] + def scan_custom(self, fct, params, key): ''' Generate a surface coloured as a function of a user-defined function. @@ -507,14 +574,17 @@

Source code for molearn.analysis.analyser

         '''
         decoded = self.get_decoded('grid')
         results = []
-        for i,j in enumerate(decoded):
-            s = (j.view(1,3,-1).permute(0,2,1)*self.stdval).numpy()
+        for i, j in enumerate(decoded):
+            s = (j.view(1, 3, -1).permute(0, 2, 1)*self.stdval).numpy()
             results.append(fct(s, *params))
         self.surfaces[key] = np.array(results).reshape(self.n_samples, self.n_samples)
         
         return self.surfaces[key], self.xvals, self.yvals
-
[docs] def generate(self, crd): + +
+[docs] + def generate(self, crd): ''' Generate a collection of protein conformations, given coordinates in the latent space. @@ -569,8 +639,8 @@

Navigation

\ No newline at end of file diff --git a/docs/build/_modules/molearn/analysis/path.html b/docs/build/_modules/molearn/analysis/path.html index f1342c8..2516674 100644 --- a/docs/build/_modules/molearn/analysis/path.html +++ b/docs/build/_modules/molearn/analysis/path.html @@ -1,18 +1,15 @@ - - + molearn.analysis.path — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -45,7 +42,8 @@

Source code for molearn.analysis.path

    :synopsis: Tools for linking waypoints with paths in latent space
 """
 
-class PriorityQueue(object):
+
+class PriorityQueue:
     '''
     Queue for shortest path algorithms.
     
@@ -112,7 +110,7 @@ 

Source code for molearn.analysis.path

             idx = np.unravel_index(idx, gridshape)
         elif len(idx) != 2:
             raise Exception("Expecting 2D coordinates")
-    except:
+    except Exception:
         raise Exception("idx should be either integer or an iterable")
 
     # generate neighbour list
@@ -142,6 +140,7 @@ 

Source code for molearn.analysis.path

     '''
     :return: scalar value, reporting on the cost of moving onto a grid cell
     '''
+    
     # separate function for clarity, and in case in the future we want to alter this
     return graph[pt]
     
@@ -192,7 +191,9 @@ 

Source code for molearn.analysis.path

     return came_from, cost_so_far
 
 
-
[docs]def get_path(idx_start, idx_end, landscape, xvals, yvals, smooth=3): +
+[docs] +def get_path(idx_start, idx_end, landscape, xvals, yvals, smooth=3): ''' Find shortest path between two points on a weighted grid @@ -216,7 +217,9 @@

Source code for molearn.analysis.path

     coords = []
     score = []
     idx_flat = np.ravel_multi_index(idx_end, landscape.shape)
-    while cnt<1000: #safeguad for (unlikely) unfinished paths
+    
+    # safeguard for (unlikely) unfinished paths
+    while cnt<1000:
 
         if idx_flat == mypath[idx_flat]:
             break
@@ -258,7 +261,9 @@ 

Source code for molearn.analysis.path

     return np.array([my_x, my_y])
 
 
-
[docs]def get_path_aggregate(crd, landscape, xvals, yvals, input_is_index=False): +
+[docs] +def get_path_aggregate(crd, landscape, xvals, yvals, input_is_index=False): ''' Create a chain of shortest paths via give waypoints @@ -290,7 +295,10 @@

Source code for molearn.analysis.path

     return crd
-
[docs]def oversample(crd, pts=10): + +
+[docs] +def oversample(crd, pts=10): ''' Add extra equally spaced points between a list of points. @@ -298,6 +306,7 @@

Source code for molearn.analysis.path

     :param int pts: number of extra points to add in each interval
     :return: Mx2 numpy array, with M>=N.
     ''' 
+    
     pts += 1
     steps = np.linspace(1./pts, 1, pts)
     pts = [crd[0]]
@@ -307,6 +316,7 @@ 

Source code for molearn.analysis.path

             pts.append(newpt)
 
     return np.array(pts)
+
@@ -344,8 +354,8 @@

Navigation

\ No newline at end of file diff --git a/docs/build/_modules/molearn/data/pdb_data.html b/docs/build/_modules/molearn/data/pdb_data.html index 248a579..1e1a70d 100644 --- a/docs/build/_modules/molearn/data/pdb_data.html +++ b/docs/build/_modules/molearn/data/pdb_data.html @@ -1,18 +1,15 @@ - - + molearn.data.pdb_data — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -42,9 +39,12 @@

Source code for molearn.data.pdb_data

 from copy import deepcopy
 import biobox as bb
 
-
[docs]class PDBData: + +
+[docs] +class PDBData: - def __init__(self, filename = None, fix_terminal = False, atoms = None, ): + def __init__(self, filename=None, fix_terminal=False, atoms=None): ''' Create object enabling the manipulation of multi-PDB files into a dataset suitable for training. @@ -61,9 +61,11 @@

Source code for molearn.data.pdb_data

         if fix_terminal:
             self.fix_terminal()
         if atoms is not None:
-            self.atomselect(atoms = atoms)
+            self.atomselect(atoms=atoms)
 
-
[docs] def import_pdb(self, filename): +
+[docs] + def import_pdb(self, filename): ''' Load multiPDB file. This command can be called multiple times to load many datasets, if these feature the same number of atoms @@ -77,7 +79,10 @@

Source code for molearn.data.pdb_data

             self.filename = []
         self.filename.append(filename)
-
[docs] def fix_terminal(self): + +
+[docs] + def fix_terminal(self): ''' Rename OT1 N-terminal Oxygen to O if terminal oxygens are named OT1 and OT2 otherwise no oxygen will be selected during an atomselect using atoms = ['CA', 'C','N','O','CB']. No template will be found for terminal residue in openmm_loss. Alternative solution is to use atoms = ['CA', 'C', 'N', 'O', 'CB', 'OT1']. instead. ''' @@ -86,7 +91,10 @@

Source code for molearn.data.pdb_data

         if len(ot1)!=0 and len(ot2)!=0:
             self._mol.data.loc[ot1,'name']='O'
-
[docs] def atomselect(self, atoms, ignore_atoms=[]): + +
+[docs] + def atomselect(self, atoms, ignore_atoms=[]): ''' From all imported PDBs, extract only atoms of interest. :func:`import_pdb <molearn.data.PDBData.import_pdb>` must have been called at least once, either at class instantiation or as a separate call. @@ -99,15 +107,19 @@

Source code for molearn.data.pdb_data

                 if to_remove in _atoms:
                     _atoms.remove(to_remove)
         elif atoms == "no_hydrogen":
-            _atoms = self.atoms #list(np.unique(self._mol.data["name"].values))    #all the atoms
+            _atoms = self.atoms  # list(np.unique(self._mol.data["name"].values))    #all the atoms
             _plain_atoms = []
             for a in _atoms:
                 if a in self._mol.knowledge['atomtype']:
                     _plain_atoms.append(self._mol.knowledge['atomtype'][a])
                 elif a[:-1] in self._mol.knowledge['atomtype']:
                     _plain_atoms.append(self._mol.knowledge['atomtype'][a[:-1]])
+                    print(f'Could not find {a}. I am assuing you meant {a[:-1]} instead.')
+                elif a[:-2] in self._mol.knowledge['atomtype']:
+                    _plain_atoms.append(self._mol.knowledge['atomtype'][a[:-2]])
+                    print(f'Could not find {a}. I am assuming you meant {a[:-2]} instead.')
                 else:
-                    _plain_atoms.append(self._mol.knowledge['atomtype'][a]) # if above failed just raise the keyerror
+                    _plain_atoms.append(self._mol.knowledge['atomtype'][a])  # if above failed just raise the keyerror
             _atoms = [atom for atom, element in zip(_atoms, _plain_atoms) if element != 'H']
         else:
             _atoms = [_a for _a in atoms if _a not in ignore_atoms]
@@ -115,7 +127,10 @@ 

Source code for molearn.data.pdb_data

         _, self._idxs = self._mol.atomselect("*", "*", _atoms, get_index=True)
         self._mol = self._mol.get_subset(self._idxs)
-
[docs] def prepare_dataset(self): + +
+[docs] + def prepare_dataset(self): ''' Once all datasets have been loaded, normalise data and convert into `torch.Tensor` (ready for training) ''' @@ -134,7 +149,10 @@

Source code for molearn.data.pdb_data

         print(f'Dataset.shape: {self.dataset.shape}')
         print(f'mean: {str(self.mean)}, std: {str(self.std)}')
-
[docs] def get_atominfo(self): + +
+[docs] + def get_atominfo(self): ''' generate list of all atoms in dataset, where every line contains [atom name, residue name, resid] ''' @@ -143,7 +161,10 @@

Source code for molearn.data.pdb_data

             self.atominfo = self._mol.get_data(columns=['name', 'resname', 'resid'])
         return self.atominfo
-
[docs] def frame(self): + +
+[docs] + def frame(self): ''' return `biobox.Molecule` object with loaded data ''' @@ -156,7 +177,10 @@

Source code for molearn.data.pdb_data

         M.properties['center'] = M.get_center()
         return deepcopy(M)
-
[docs] def get_dataloader(self, batch_size, validation_split=0.1, pin_memory=True, dataset_sample_size=-1, manual_seed=None, shuffle=True, sampler=None): + +
+[docs] + def get_dataloader(self, batch_size, validation_split=0.1, pin_memory=True, dataset_sample_size=-1, manual_seed=None, shuffle=True, sampler=None): ''' :param batch_size: :param validation_split: @@ -182,8 +206,11 @@

Source code for molearn.data.pdb_data

             self.train_dataloader = torch.utils.data.DataLoader(self.train_dataset, batch_size=batch_size, pin_memory=pin_memory, shuffle=True)
         self.valid_dataloader = torch.utils.data.DataLoader(self.valid_dataset, batch_size=batch_size, pin_memory=pin_memory,shuffle=True)
         return self.train_dataloader, self.valid_dataloader
+ -
[docs] def split(self, *args, **kwargs): +
+[docs] + def split(self, *args, **kwargs): ''' Split :func:`PDBData <molearn.data.PDBData>` into two other :func:`PDBData <molearn.data.PDBData>` objects corresponding to train and valid sets. @@ -194,7 +221,7 @@

Source code for molearn.data.pdb_data

         :return: :func:`PDBData <molearn.data.PDBData>` object corresponding to train set
         :return: :func:`PDBData <molearn.data.PDBData>` object corresponding to validation set
         '''
-        #validation_split=0.1, valid_size=None, train_size=None, manual_seed = None):
+        # validation_split=0.1, valid_size=None, train_size=None, manual_seed = None):
         train_dataset, valid_dataset = self.get_datasets(*args, **kwargs)
         train = PDBData()
         valid = PDBData()
@@ -205,7 +232,10 @@ 

Source code for molearn.data.pdb_data

         valid.dataset = valid_dataset
         return train, valid
-
[docs] def get_datasets(self, validation_split=0.1, valid_size=None, train_size=None, manual_seed = None): + +
+[docs] + def get_datasets(self, validation_split=0.1, valid_size=None, train_size=None, manual_seed=None): ''' Create a training and validation set from the imported data @@ -229,7 +259,7 @@

Source code for molearn.data.pdb_data

                 _valid_size = valid_size
         from torch import randperm
         if manual_seed is not None:
-            indices = randperm(len(self.dataset), generator = torch.Generator().manual_seed(manual_seed))
+            indices = randperm(len(self.dataset), generator=torch.Generator().manual_seed(manual_seed))
         else:
             indices = randperm(len(self.dataset))
 
@@ -238,16 +268,14 @@ 

Source code for molearn.data.pdb_data

         valid_dataset = dataset[indices[_train_size:_train_size+_valid_size]]
         return train_dataset, valid_dataset
+ @property def atoms(self): - return list(np.unique(self._mol.data["name"].values)) #all the atoms + return list(np.unique(self._mol.data["name"].values)) # all the atoms @property def mol(self): return self.frame()
- - -
@@ -286,8 +314,8 @@

Navigation

\ No newline at end of file diff --git a/docs/build/_modules/molearn/loss_functions/openmm_thread.html b/docs/build/_modules/molearn/loss_functions/openmm_thread.html index ddff98f..ac7c462 100644 --- a/docs/build/_modules/molearn/loss_functions/openmm_thread.html +++ b/docs/build/_modules/molearn/loss_functions/openmm_thread.html @@ -1,18 +1,15 @@ - - + molearn.loss_functions.openmm_thread — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -53,11 +50,14 @@

Source code for molearn.loss_functions.openmm_thread

import torch import numpy as np +from copy import deepcopy -
[docs]class ModifiedForceField(ForceField): +
+[docs] +class ModifiedForceField(ForceField): - def __init__(self, *args, alternative_residue_names = None, **kwargs): + def __init__(self, *args, alternative_residue_names=None, **kwargs): ''' Takes all `*args` and `**kwargs` of `openmm.app.ForceField`, plus an optional parameter described here. @@ -119,7 +119,7 @@

Source code for molearn.loss_functions.openmm_thread

matches = m return [template, matches] print(f'multiple for {t.name}') - # We found multiple matches. This is OK if and only if they assign identical types and parameters to all atoms. + # We found multiple matches. This is OK if and only if they assign identical types and parameters to all atoms. t1, m1 = allMatches[0] for t2, m2 in allMatches[1:]: @@ -129,15 +129,19 @@

Source code for molearn.loss_functions.openmm_thread

matches = allMatches[0][1] return [template, matches]
-
[docs]class OpenmmPluginScore(): + + +
+[docs] +class OpenmmPluginScore(): ''' This will use the new OpenMM Plugin to calculate forces and energy. The intention is that this will be fast enough to be able to calculate forces and energy during training. N.B.: The current torchintegratorplugin only supports float on GPU and double on CPU. ''' - def __init__(self, mol=None, xml_file = ['amber14-all.xml'], platform = 'CUDA', remove_NB=False, - alternative_residue_names = dict(HIS='HIE', HSE='HIE'), atoms=['CA', 'C', 'N', 'CB','O'], - soft=False): + def __init__(self, mol=None, xml_file=['amber14-all.xml'], platform='CUDA', remove_NB=False, + alternative_residue_names=dict(HIS='HIE', HSE='HIE'), atoms=['CA', 'C', 'N', 'CB','O'], + soft=False): ''' :param `biobox.Molecule` mol: if pldataloader is not given, then a biobox object will be taken from this parameter. If neither are given then an error will be thrown. :param str xml_file: xml parameter file @@ -149,12 +153,12 @@

Source code for molearn.loss_functions.openmm_thread

''' self.mol = mol for key, value in alternative_residue_names.items(): - #self.mol.data.loc[:,'resname'][self.mol.data['resname']==key]=value + # self.mol.data.loc[:,'resname'][self.mol.data['resname']==key]=value self.mol.data.loc[self.mol.data['resname']==key,'resname']=value - #self.mol.data.loc[lambda df: df['resname']==key, key]=value + # self.mol.data.loc[lambda df: df['resname']==key, key]=value tmp_file = f'tmp{np.random.randint(1e10)}.pdb' self.atoms = atoms - self.mol.write_pdb(tmp_file, split_struc = False) + self.mol.write_pdb(tmp_file, split_struc=False) self.pdb = PDBFile(tmp_file) if soft: print('attempting soft forcefield') @@ -164,9 +168,9 @@

Source code for molearn.loss_functions.openmm_thread

self.system = self.forcefield.createSystem(self.pdb.topology) else: if isinstance(xml_file,str): - self.forcefield = ModifiedForceField(xml_file, alternative_residue_names = alternative_residue_names) + self.forcefield = ModifiedForceField(xml_file, alternative_residue_names=alternative_residue_names) elif len(xml_file)>0: - self.forcefield = ModifiedForceField(*xml_file, alternative_residue_names = alternative_residue_names) + self.forcefield = ModifiedForceField(*xml_file, alternative_residue_names=alternative_residue_names) else: raise ValueError(f'xml_file: {xml_file} needs to be a str or a list of str') @@ -174,14 +178,14 @@

Source code for molearn.loss_functions.openmm_thread

self.ignore_hydrogen() else: self.atomselect(atoms) - #save pdb and reload in modeller + # save pdb and reload in modeller templates, unique_unmatched_residues = self.forcefield.generateTemplatesForUnmatchedResidues(self.pdb.topology) self.system = self.forcefield.createSystem(self.pdb.topology) if remove_NB: forces = self.system.getForces() for idx in reversed(range(len(forces))): force = forces[idx] - if isinstance(force, (#openmm.PeriodicTorsionForce, + if isinstance(force, ( # openmm.PeriodicTorsionForce, openmm.CustomGBForce, openmm.NonbondedForce, openmm.CMMotionRemover)): @@ -193,7 +197,6 @@

Source code for molearn.loss_functions.openmm_thread

if isinstance(force, openmm.CustomGBForce): self.system.removeForce(idx) - self.integrator = TorchExposedIntegrator() self.platform = Platform.getPlatformByName(platform) self.simulation = Simulation(self.pdb.topology, self.system, self.integrator, self.platform) @@ -206,7 +209,7 @@

Source code for molearn.loss_functions.openmm_thread

os.remove(tmp_file) def ignore_hydrogen(self): - #ignore = ['ASH', 'LYN', 'GLH', 'HID', 'HIP', 'CYM', ] + # ignore = ['ASH', 'LYN', 'GLH', 'HID', 'HIP', 'CYM', ] ignore = [] for name, template in self.forcefield._templates.items(): if name in ignore: @@ -232,6 +235,12 @@

Source code for molearn.loss_functions.openmm_thread

self.forcefield.registerPatch(patchData) def atomselect(self, atoms): + atoms = deepcopy(atoms) + if 'OT2' in atoms: + atoms.append('OXT') + if 'OT1' in atoms: + atoms.append('OXT') + for name, template in self.forcefield._templates.items(): patchData = ForceField._PatchData(name+'_leave_only_'+'_'.join(atoms), 1) @@ -254,8 +263,9 @@

Source code for molearn.loss_functions.openmm_thread

self.forcefield.registerTemplatePatch(name, name+'_leave_only_'+'_'.join(atoms), 0) self.forcefield.registerPatch(patchData) - -
[docs] def get_energy(self, pos_ptr, force_ptr, energy_ptr, n_particles, batch_size): +
+[docs] + def get_energy(self, pos_ptr, force_ptr, energy_ptr, n_particles, batch_size): ''' :param pos_ptr: tensor.data_ptr() :param force_ptr: tensor.data_ptr() @@ -268,17 +278,25 @@

Source code for molearn.loss_functions.openmm_thread

self.integrator.torchMultiStructureE(pos_ptr, force_ptr, energy_ptr, n_particles, batch_size) return True
-
[docs] def execute(self, x): + +
+[docs] + def execute(self, x): ''' :param `torch.Tensor` x: shape [b, N, 3]. dtype=float. device = gpu ''' force = torch.zeros_like(x) - energy = torch.zeros(x.shape[0], device = torch.device('cpu'), dtype=torch.double) + energy = torch.zeros(x.shape[0], device=torch.device('cpu'), dtype=torch.double) self.get_energy(x.data_ptr(), force.data_ptr(), energy.data_ptr(), x.shape[1], x.shape[0]) - return force, energy
+ return force, energy
+
+ -
[docs]class OpenmmTorchEnergyMinimizer(OpenmmPluginScore): +
+[docs] +class OpenmmTorchEnergyMinimizer(OpenmmPluginScore): + def minimize(self, x, maxIterations=10, threshold=10000): minimized_x = torch.empty_like(x) for i,s in enumerate(x.unsqueeze(1)): @@ -301,12 +319,15 @@

Source code for molearn.loss_functions.openmm_thread

[docs]class OpenMMPluginScoreSoftForceField(OpenmmPluginScore): +
+[docs] +class OpenMMPluginScoreSoftForceField(OpenmmPluginScore): + def __init__(self, mol=None, platform='CUDA', atoms=['CA','C','N','CB','O']): self.mol = mol tmp_file = 'tmp.pdb' self.atoms = atoms - self.mol.write_pdb(tmp_file, split_struc = False) + self.mol.write_pdb(tmp_file, split_struc=False) self.pdb = PDBFile(tmp_file) from pdbfixer import PDBFixer f = PDBFixer(tmp_file) @@ -323,9 +344,14 @@

Source code for molearn.loss_functions.openmm_thread

print(self.simulation.context.getState(getEnergy=True).getPotentialEnergy()._value)
-
[docs]class openmm_energy_function(torch.autograd.Function): -
[docs] @staticmethod +
+[docs] +class openmm_energy_function(torch.autograd.Function): + +
+[docs] + @staticmethod def forward(ctx, plugin, x): ''' :param plugin: OpenmmPluginScore instance @@ -343,22 +369,32 @@

Source code for molearn.loss_functions.openmm_thread

force = torch.tensor(force).float() energy = torch.tensor(energy).float() else: - #torch.cuda.synchronize(x.device) + # torch.cuda.synchronize(x.device) force, energy = plugin.execute(x) - #torch.cuda.synchronize(x.device) + # torch.cuda.synchronize(x.device) ctx.save_for_backward(force) energy = energy.float().to(x.device) return energy
-
[docs] @staticmethod + +
+[docs] + @staticmethod def backward(ctx, grad_output): - force = ctx.saved_tensors[0] # force shape [B, N, 3] - #embed(header='23 openmm_loss_function') - return None, -force*grad_output.view(-1,1,1)
+ force = ctx.saved_tensors[0] # force shape [B, N, 3] + # embed(header='23 openmm_loss_function') + return None, -force*grad_output.view(-1,1,1)
+
+ + -
[docs]class openmm_clamped_energy_function(torch.autograd.Function): +
+[docs] +class openmm_clamped_energy_function(torch.autograd.Function): -
[docs] @staticmethod +
+[docs] + @staticmethod def forward(ctx, plugin, x, clamp): ''' :param plugin: OpenmmPluginScore instance @@ -383,13 +419,22 @@

Source code for molearn.loss_functions.openmm_thread

energy = energy.float().to(x.device) return energy
-
[docs] @staticmethod + +
+[docs] + @staticmethod def backward(ctx, grad_output): force = ctx.saved_tensors[0] - return None, -force*grad_output.view(-1,1,1), None
+ return None, -force*grad_output.view(-1, 1, 1), None
+
+ -
[docs]class openmm_energy(torch.nn.Module): - def __init__(self, mol, std, clamp = None, **kwargs): + +
+[docs] +class openmm_energy(torch.nn.Module): + + def __init__(self, mol, std, clamp=None, **kwargs): super().__init__() self.openmmplugin = OpenmmPluginScore(mol, **kwargs) self.std = std/10 @@ -404,7 +449,7 @@

Source code for molearn.loss_functions.openmm_thread

:param `torch.Tensor` x: dtype=torch.float, device=CUDA, shape B, 3, N :returns: torch energy tensor dtype should be float and on same device as x ''' - _x = (x*self.std).permute(0,2,1).contiguous() + _x = (x*self.std).permute(0, 2, 1).contiguous() energy = openmm_energy_function.apply(self.openmmplugin, _x) return energy @@ -413,7 +458,7 @@

Source code for molearn.loss_functions.openmm_thread

:param `torch.Tensor` x: dtype=torch.float, device=CUDA, shape B, 3, N :returns: torch energy tensor dtype should be float and on same device as x ''' - _x = (x*self.std).permute(0,2,1).contiguous() + _x = (x*self.std).permute(0, 2, 1).contiguous() energy = openmm_clamped_energy_function.apply(self.openmmplugin, _x, self.clamp) return energy
@@ -454,8 +499,8 @@

Navigation

\ No newline at end of file diff --git a/docs/build/_modules/molearn/models/CNN_autoencoder.html b/docs/build/_modules/molearn/models/CNN_autoencoder.html index d182e00..bd8b707 100644 --- a/docs/build/_modules/molearn/models/CNN_autoencoder.html +++ b/docs/build/_modules/molearn/models/CNN_autoencoder.html @@ -1,18 +1,15 @@ - - + molearn.models.CNN_autoencoder — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -55,27 +52,31 @@

Source code for molearn.models.CNN_autoencoder

def __init__(self, f): super(ResidualBlock, self).__init__() - conv_block = [ nn.Conv1d(f,f, 3, stride=1, padding=1, bias=False), - nn.BatchNorm1d(f), - nn.ReLU(inplace=True), - nn.Conv1d(f,f, 3, stride=1, padding=1, bias=False), - nn.BatchNorm1d(f) ] + conv_block = [nn.Conv1d(f, f, 3, stride=1, padding=1, bias=False), + nn.BatchNorm1d(f), + nn.ReLU(inplace=True), + nn.Conv1d(f, f, 3, stride=1, padding=1, bias=False), + nn.BatchNorm1d(f)] self.conv_block = nn.Sequential(*conv_block) def forward(self, x): return x + self.conv_block(x) - #return torch.relu(x + self.conv_block(x)) #earlier runs were with 'return x + self.conv_block(x)' but not an issue (really?) + # return torch.relu(x + self.conv_block(x)) #earlier runs were with 'return x + self.conv_block(x)' but not an issue (really?) + class To2D(nn.Module): + def __init__(self): super(To2D, self).__init__() pass + def forward(self, x): - z = torch.nn.functional.adaptive_avg_pool2d(x, output_size=(2,1)) + z = torch.nn.functional.adaptive_avg_pool2d(x, output_size=(2, 1)) z = torch.sigmoid(z) return z + class From2D(nn.Module): def __init__(self): super(From2D, self).__init__() @@ -88,9 +89,9 @@

Source code for molearn.models.CNN_autoencoder

return x - - -

[docs]class Autoencoder(nn.Module): +
+[docs] +class Autoencoder(nn.Module): ''' This is the autoencoder used in our `Ramaswamy 2021 paper <https://journals.aps.org/prx/abstract/10.1103/PhysRevX.11.011052>`_. It is largely superseded by :func:`molearn.models.foldingnet.AutoEncoder`. @@ -152,6 +153,7 @@

Source code for molearn.models.CNN_autoencoder

for m in self.decoder: x = m(x) return x

+
@@ -189,8 +191,8 @@

Navigation

\ No newline at end of file diff --git a/docs/build/_modules/molearn/models/foldingnet.html b/docs/build/_modules/molearn/models/foldingnet.html index 10ce2f5..9a380a6 100644 --- a/docs/build/_modules/molearn/models/foldingnet.html +++ b/docs/build/_modules/molearn/models/foldingnet.html @@ -1,18 +1,15 @@ - - + molearn.models.foldingnet — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -38,10 +35,10 @@

Navigation

Source code for molearn.models.foldingnet

 import torch
-import biobox as bb
 from torch import nn
 import torch.nn.functional as F
 
+
 def index_points(point_clouds, index):
     '''
     Given a batch of tensor and index, select sub-tensor.
@@ -73,7 +70,7 @@ 

Source code for molearn.models.foldingnet

     xx = torch.sum(x ** 2, dim=1, keepdim=True)  # (B, 1, N)
     pairwise_distance = -xx - inner - xx.transpose(2, 1)  # (B, 1, N), (B, N, N), (B, N, 1) -> (B, N, N)
 
-    idx = pairwise_distance.topk(k=k, dim=-1)[1]   # (B, N, k)
+    idx = pairwise_distance.topk(k=k, dim=-1)[1]  # (B, N, k)
     return idx
 
 
@@ -109,7 +106,7 @@ 

Source code for molearn.models.foldingnet

     '''
     Graph based encoder
     '''
-    def __init__(self, latent_dimension = 2,**kwargs):
+    def __init__(self, latent_dimension=2, **kwargs):
         super(Encoder, self).__init__()
         self.latent_dimension = latent_dimension
         self.conv1 = nn.Conv1d(12, 64, 1)
@@ -143,7 +140,6 @@ 

Source code for molearn.models.foldingnet

         x = F.relu(self.bn2(self.conv2(x)))
         x = F.relu(self.bn3(self.conv3(x)))
 
-
         # two consecutive graph layers
         x = self.graph_layer1(x)
         x = self.graph_layer2(x)
@@ -181,9 +177,9 @@ 

Source code for molearn.models.foldingnet

         :param grids: reshaped 2D grids or intermediam reconstructed point clouds
         """
         # concatenate
-        #try:
+        # try:
         #    x = torch.cat([*args], dim=1)
-        #except:
+        # except:
         #    for arg in args:
         #        print(arg.shape)
         #    raise
@@ -193,6 +189,7 @@ 

Source code for molearn.models.foldingnet

 
         return x
 
+
 class Decoder_Layer(nn.Module):
     '''
     Decoder Module of FoldingNet
@@ -202,14 +199,14 @@ 

Source code for molearn.models.foldingnet

         super(Decoder_Layer, self).__init__()
 
         # Sample the grids in 2D space
-        #xx = np.linspace(-0.3, 0.3, 45, dtype=np.float32)
-        #yy = np.linspace(-0.3, 0.3, 45, dtype=np.float32)
-        #self.grid = np.meshgrid(xx, yy)   # (2, 45, 45)
+        # xx = np.linspace(-0.3, 0.3, 45, dtype=np.float32)
+        # yy = np.linspace(-0.3, 0.3, 45, dtype=np.float32)
+        # self.grid = np.meshgrid(xx, yy)   # (2, 45, 45)
         self.out_points = out_points
         self.grid = torch.linspace(-0.5, 0.5, out_points).view(1,-1)
         # reshape
-        #self.grid = torch.Tensor(self.grid).view(2, -1)  # (2, 45, 45) -> (2, 45 * 45)
-        assert out_points%in_points==0
+        # self.grid = torch.Tensor(self.grid).view(2, -1)  # (2, 45, 45) -> (2, 45 * 45)
+        assert out_points % in_points == 0
         self.m = out_points//in_points
 
         self.fold1 = FoldingLayer(in_channel + 1, [512, 512, out_channel])
@@ -234,6 +231,7 @@ 

Source code for molearn.models.foldingnet

 
         return recon2
 
+
 class Decoder(nn.Module):
     '''
     Decoder Module of FoldingNet
@@ -244,14 +242,12 @@ 

Source code for molearn.models.foldingnet

         self.latent_dimension = latent_dimension
 
         # Sample the grids in 2D space
-        #xx = np.linspace(-0.3, 0.3, 45, dtype=np.float32)
-        #yy = np.linspace(-0.3, 0.3, 45, dtype=np.float32)
-        #self.grid = np.meshgrid(xx, yy)   # (2, 45, 45)
+        # xx = np.linspace(-0.3, 0.3, 45, dtype=np.float32)
+        # yy = np.linspace(-0.3, 0.3, 45, dtype=np.float32)
+        # self.grid = np.meshgrid(xx, yy)   # (2, 45, 45)
 
-        
         start_out = (out_points//128) +1
 
-
         self.out_points = out_points
 
         self.layer1 = Decoder_Layer(1,           start_out,    latent_dimension,3*128)
@@ -272,7 +268,9 @@ 

Source code for molearn.models.foldingnet

         return x
 
 
-
[docs]class AutoEncoder(nn.Module): +
+[docs] +class AutoEncoder(nn.Module): ''' Autoencoder architecture derived from FoldingNet. ''' @@ -289,10 +287,14 @@

Source code for molearn.models.foldingnet

     def decode(self, x):
         return self.decoder(x)
 
-
[docs] def forward(self, x): +
+[docs] + def forward(self, x): x = self.encoder(x) x = self.decoder(x) - return x
+ return x
+
+ if __name__=='__main__': @@ -334,8 +336,8 @@

Navigation

\ No newline at end of file diff --git a/docs/build/_modules/molearn/scoring/dope_score.html b/docs/build/_modules/molearn/scoring/dope_score.html index 0725482..7c51821 100644 --- a/docs/build/_modules/molearn/scoring/dope_score.html +++ b/docs/build/_modules/molearn/scoring/dope_score.html @@ -1,18 +1,15 @@ - - + molearn.scoring.dope_score — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -40,7 +37,7 @@

Source code for molearn.scoring.dope_score

 import numpy as np
 from copy import deepcopy
 
-from ..utils import ShutUp, cpu_count, random_string
+from ..utils import ShutUp, random_string
 try:
     import modeller
     from modeller import *
@@ -49,22 +46,25 @@ 

Source code for molearn.scoring.dope_score

 except Exception as e:
     print('Error importing modeller: ')
     print(e)
-    
 
-from multiprocessing import Pool, Event, get_context
+from multiprocessing import get_context
 import os
     
-
[docs]class DOPE_Score: + +
+[docs] +class DOPE_Score: ''' This class contains methods to calculate dope without saving to save and load PDB files for every structure. Atoms in a biobox coordinate tensor are mapped to the coordinates in the modeller model directly. ''' + atom_map = {('ILE', 'CD1'):('ILE', 'CD')} def __init__(self, mol): ''' :param biobox.Molecule mol: One example frame to gain access to the topology. Mol will also be used to save a temporary pdb file that will be reloaded in modeller to create the initial modeller Model. ''' - #set residues names with protonated histidines back to generic HIS name (needed by DOPE score function) + # set residues names with protonated histidines back to generic HIS name (needed by DOPE score function) testH = mol.data["resname"].values testH[testH == "HIE"] = "HIS" testH[testH == "HID"] = "HIS" @@ -73,10 +73,9 @@

Source code for molearn.scoring.dope_score

 
         alternate_residue_names = dict(CSS=('CYX',))
         atoms = ' '.join(list(_mol.data['name'].unique()))
-        #tmp_file = f'tmp{np.random.randint(1e10)}.pdb'
         tmp_file = f'tmp{random_string()}.pdb'
-        _mol.write_pdb(tmp_file, conformations=[0], split_struc = False)
-        log.level(0,0,0,0,0)
+        _mol.write_pdb(tmp_file, conformations=[0], split_struc=False)
+        log.level(0, 0, 0, 0, 0)
         env = environ()
         env.libs.topology.read(file='$(LIB)/top_heav.lib')
         env.libs.parameters.read(file='$(LIB)/par.lib')
@@ -86,7 +85,7 @@ 

Source code for molearn.scoring.dope_score

         atom_residue = _mol.get_data(columns=['name', 'resname', 'resid'])
         atom_order = []
         first_index = next(iter(self.fast_ss)).residue.index
-        offset = atom_residue[0,2]-first_index
+        offset = atom_residue[0, 2]-first_index
         for i, j in enumerate(self.fast_ss):
             if i < len(atom_residue):
                 for j_residue_name in alternate_residue_names.get(j.residue.name, (j.residue.name,)):
@@ -95,16 +94,26 @@ 

Source code for molearn.scoring.dope_score

                     else:
                         where_arg = (atom_residue==(np.array([j.name, j_residue_name, j.residue.index+offset], dtype=object))).all(axis=1)
                         where = np.where(where_arg)[0]
+                        if len(where)==0:
+                            if (j_residue_name, j.name) in self.atom_map:
+                                alt_residue_name, alt_name = self.atom_map[(j_residue_name, j.name)]
+                                where_arg = (atom_residue==(np.array([alt_name, alt_residue_name, j.residue.index+offset], dtype=object))).all(axis=1)
+                                where = np.where(where_arg)[0]
+                            else:
+                                print(f'Cant find {j.name} in the atoms {atom_residue[atom_residue[:,2]==j.residue.index+offset]} try adding a mapping to DOPE_Score.atom_map')
                         atom_order.append(int(where))
         self.fast_atom_order = atom_order
         # check fast dope atoms
-        for i,j in enumerate(self.fast_ss):
+        reverse_map = {value:key for key, value in self.atom_map.items()}
+        for i, j in enumerate(self.fast_ss):
             if i<len(atom_residue):
-                assert _mol.data['name'][atom_order[i]]==j.name
+                assert _mol.data['name'][atom_order[i]]==j.name or reverse_map[(_mol.data['resname'][atom_order[i]], _mol.data['name'][atom_order[i]])][1]==j.name
         self.cg = ConjugateGradients()
         os.remove(tmp_file)
 
-
[docs] def get_dope(self, frame, refine=False): +
+[docs] + def get_dope(self, frame, refine=False): ''' Get the dope score. Injects coordinates into modeller and uses `mdl.build(build_method='INTERNAL_COORDINATES', initialize_xyz=False)` to reconstruct missing atoms. If a error is thrown by modeller or at any stage, we just return a fixed large value of 1e10. @@ -114,6 +123,7 @@

Source code for molearn.scoring.dope_score

         :returns: Dope score as calculated by modeller. If error is thrown we just simply return 1e10.
         :rtype: float
         '''
+        
         # expect coords to be shape [N, 3] use .cpu().numpy().copy() before passing here and make sure it is scaled correctly
         try:
             frame = frame.astype(float)
@@ -135,10 +145,13 @@ 

Source code for molearn.scoring.dope_score

                 dope_score = self.fast_fs.assess_dope()
 
             return dope_score
-        except:
+        except Exception:
             return 1e10
+ -
[docs] def get_all_dope(self, coords, refine=False): +
+[docs] + def get_all_dope(self, coords, refine=False): ''' Expect a array of frames. return array of DOPE score value. @@ -147,6 +160,7 @@

Source code for molearn.scoring.dope_score

         :returns: float array shape [B]
         :rtype: np.ndarray
         '''
+        
         # expect coords to be shape [B, N, 3] use .cpu().numpy().copy() before passing here and make sure it is scaled correctly
         dope_scores = []
         for frame in coords:
@@ -162,23 +176,32 @@ 

Source code for molearn.scoring.dope_score

                 
             dope_scores.append(self.fast_fs.assess_dope())
             
-        return np.array(dope_scores)
+ return np.array(dope_scores)
+
+ + def set_global_score(score, kwargs): ''' Make score a global variable. This is used when initializing a multiprocessing process. ''' + global worker_dope_score - worker_dope_score = score(**kwargs)#mol = mol, data_dir=data_dir, **kwargs) + worker_dope_score = score(**kwargs) # mol = mol, data_dir=data_dir, **kwargs) + def process_dope(coords, kwargs): ''' Worker function for multiprocessing class ''' + return worker_dope_score.get_dope(coords,**kwargs) -
[docs]class Parallel_DOPE_Score(): + +
+[docs] +class Parallel_DOPE_Score: ''' a multiprocessing class to get modeller DOPE scores. A typical use case would looke like:: @@ -190,9 +213,9 @@

Source code for molearn.scoring.dope_score

       .... # DOPE will be calculated asynchronously in background
       #to retrieve the results
       results = np.array([r.get() for r in results])
-
     '''
-    def __init__(self, mol, processes=-1, context = 'spawn', **kwargs):
+    
+    def __init__(self, mol, processes=-1, context='spawn', **kwargs):
         '''
         :param biobox.Molecule mol: biobox molecule containing one example frame of the protein to be analysed. This will be passed to DOPE_Score class instances in each thread.
         :param int processes: (default: -1) Number of processes argument to pass to multiprocessing.pool. This controls the number of threads created.
@@ -201,28 +224,32 @@ 

Source code for molearn.scoring.dope_score

         
         # set a number of processes as user desires, capped on number of CPUs
         if processes > 0:
-            processes = min(processes, cpu_count())
+            processes = min(processes, os.cpu_count())
         else:
-            processes = cpu_count()
+            processes = os.cpu_count()
         self.processes = processes
         self.mol = deepcopy(mol)
         score = DOPE_Score
         ctx = get_context(context)
         self.pool = ctx.Pool(processes=processes, initializer=set_global_score,
                          initargs=(score, dict(mol=mol)),
-                         **kwargs,
-                         )
+                         **kwargs)
         self.process_function = process_dope
 
     def __reduce__(self):
         return (self.__class__, (self.mol, self.processes))
 
-
[docs] def get_score(self, coords, **kwargs): +
+[docs] + def get_score(self, coords, **kwargs): ''' :param np.array coords: # shape (N, 3) numpy array ''' - #is copy necessary? - return self.pool.apply_async(self.process_function, (coords.copy(), kwargs))
+ + # is copy necessary? + return self.pool.apply_async(self.process_function, (coords.copy(), kwargs))
+
+
@@ -260,8 +287,8 @@

Navigation

\ No newline at end of file diff --git a/docs/build/_modules/molearn/scoring/ramachandran_score.html b/docs/build/_modules/molearn/scoring/ramachandran_score.html index b480e8b..7271dff 100644 --- a/docs/build/_modules/molearn/scoring/ramachandran_score.html +++ b/docs/build/_modules/molearn/scoring/ramachandran_score.html @@ -1,18 +1,15 @@ - - + molearn.scoring.ramachandran_score — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -39,36 +36,41 @@

Navigation

Source code for molearn.scoring.ramachandran_score

 import numpy as np
 from copy import deepcopy
-from multiprocessing import Pool, Event, get_context
+from multiprocessing import get_context
 from scipy.spatial.distance import cdist
 
 from iotbx.data_manager import DataManager
 from mmtbx.validation.ramalyze import ramalyze
 from scitbx.array_family import flex
 
-from ..utils import cpu_count, random_string
+from ..utils import random_string
 import os
 
-
[docs]class Ramachandran_Score(): + +
+[docs] +class Ramachandran_Score: ''' This class contains methods that use iotbx/mmtbx to calulate the quality of phi and psi values in a protein. ''' + def __init__(self, mol, threshold=1e-3): ''' :param biobox.Molecule mol: One example frame to gain access to the topology. Mol will also be used to save a temporary pdb file that will be reloaded to create the initial iotbx Model. :param float threshold: (default: 1e-3) Threshold used to determine similarity between biobox.molecule coordinates and iotbx model coordinates. Determine that iotbx model was created successfully. ''' + tmp_file = f'rama_tmp{random_string()}.pdb' - mol.write_pdb(tmp_file, split_struc = False)#'rama_tmp.pdb') - filename = tmp_file#'rama_tmp.pdb' + mol.write_pdb(tmp_file, split_struc=False) + filename = tmp_file self.mol = mol - self.dm = DataManager(datatypes = ['model']) + self.dm = DataManager(datatypes=['model']) self.dm.process_model_file(filename) self.model = self.dm.get_model(filename) - self.score = ramalyze(self.model.get_hierarchy()) # get score to see if this works + self.score = ramalyze(self.model.get_hierarchy()) # get score to see if this works self.shape = self.model.get_sites_cart().as_numpy_array().shape - #tests + # tests x = self.mol.coordinates[0] m = self.model.get_sites_cart().as_numpy_array() assert m.shape == x.shape @@ -77,15 +79,17 @@

Source code for molearn.scoring.ramachandran_score

assert not np.any(((m-x[self.idxs])>threshold)) os.remove(tmp_file) -
[docs] def get_score(self, coords, as_ratio = False): +
+[docs] + def get_score(self, coords, as_ratio=False): ''' Given coords (corresponding to self.mol) will calculate Ramachandran scores using cctbux ramalyze module Returns the counts of number of torsion angles that fall within favored, allowed, and outlier regions and finally the total number of torsion angles analysed. :param numpy.ndarray coords: shape (N, 3) :returns: (favored, allowed, outliers, total) :rtype: tuple of ints - ''' + assert coords.shape == self.shape self.model.set_sites_cart(flex.vec3_double(coords[self.idxs].astype(np.double))) self.score = ramalyze(self.model.get_hierarchy()) @@ -96,7 +100,8 @@

Source code for molearn.scoring.ramachandran_score

if as_ratio: return nf/nt, na/nt, no/nt else: - return nf, na, no, nt
+ return nf, na, no, nt
+
@@ -105,17 +110,23 @@

Source code for molearn.scoring.ramachandran_score

make score a global variable This is used when initializing a multiprocessing process ''' + global worker_ramachandran_score - worker_ramachandran_score = score(**kwargs)#mol = mol, data_dir=data_dir, **kwargs) + worker_ramachandran_score = score(**kwargs) # mol = mol, data_dir=data_dir, **kwargs) + def process_ramachandran(coords, kwargs): ''' ramachandran worker Worker function for multiprocessing class ''' - return worker_ramachandran_score.get_score(coords,**kwargs) + + return worker_ramachandran_score.get_score(coords, **kwargs) -
[docs]class Parallel_Ramachandran_Score(): + +
+[docs] +class Parallel_Ramachandran_Score: ''' A multiprocessing class to get Ramachandran scores. A typical use case would looke like:: @@ -143,31 +154,30 @@

Source code for molearn.scoring.ramachandran_score

# set a number of processes as user desires, capped on number of CPUs if processes > 0: - processes = min(processes, cpu_count()) + processes = min(processes, os.cpu_count()) else: - processes = cpu_count() + processes = os.cpu_count() self.mol = deepcopy(mol) score = Ramachandran_Score ctx = get_context('spawn') self.pool = ctx.Pool(processes=processes, initializer=set_global_score, - initargs=(score, dict(mol=mol)), - ) + initargs=(score, dict(mol=mol))) self.process_function = process_ramachandran def __reduce__(self): return (self.__class__, (self.mol,)) - -
[docs] def get_score(self, coords,**kwargs): +
+[docs] + def get_score(self, coords, **kwargs): ''' :param coords: # shape (N, 3) numpy array ''' - #is copy necessary? - return self.pool.apply_async(self.process_function, (coords.copy(), kwargs))
- - + # is copy necessary? + return self.pool.apply_async(self.process_function, (coords.copy(), kwargs))
+
@@ -206,8 +216,8 @@

Navigation

\ No newline at end of file diff --git a/docs/build/_modules/molearn/trainers/openmm_physics_trainer.html b/docs/build/_modules/molearn/trainers/openmm_physics_trainer.html index b81d016..288c404 100644 --- a/docs/build/_modules/molearn/trainers/openmm_physics_trainer.html +++ b/docs/build/_modules/molearn/trainers/openmm_physics_trainer.html @@ -1,18 +1,15 @@ - - + molearn.trainers.openmm_physics_trainer — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -42,7 +39,9 @@

Source code for molearn.trainers.openmm_physics_trainer

from .trainer import Trainer -
[docs]class OpenMM_Physics_Trainer(Trainer): +
+[docs] +class OpenMM_Physics_Trainer(Trainer): ''' OpenMM_Physics_Trainer subclasses Trainer and replaces the valid_step and train_step. An extra 'physics_loss' is calculated using OpenMM and the forces are inserted into backwards pass. @@ -52,7 +51,9 @@

Source code for molearn.trainers.openmm_physics_trainer

def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) -
[docs] def prepare_physics(self, physics_scaling_factor=0.1, clamp_threshold = 1e8, clamp=False, start_physics_at=0, **kwargs): +
+[docs] + def prepare_physics(self, physics_scaling_factor=0.1, clamp_threshold=1e8, clamp=False, start_physics_at=0, **kwargs): ''' Create ``self.physics_loss`` object from :func:`loss_functions.openmm_energy <molearn.loss_functions.openmm_energy>` Needs ``self.mol``, ``self.std``, and ``self._data.atoms`` to have been set with :func:`Trainer.set_data<molearn.trainer.Trainer.set_data>` @@ -67,13 +68,15 @@

Source code for molearn.trainers.openmm_physics_trainer

self.start_physics_at = start_physics_at self.psf = physics_scaling_factor if clamp: - clamp_kwargs = dict(max=clamp_threshold, min = -clamp_threshold) + clamp_kwargs = dict(max=clamp_threshold, min=-clamp_threshold) else: clamp_kwargs = None - self.physics_loss = openmm_energy(self.mol, self.std, clamp=clamp_kwargs, platform = 'CUDA' if self.device == torch.device('cuda') else 'Reference', atoms = self._data.atoms, **kwargs)
+ self.physics_loss = openmm_energy(self.mol, self.std, clamp=clamp_kwargs, platform='CUDA' if self.device == torch.device('cuda') else 'Reference', atoms=self._data.atoms, **kwargs)
-
[docs] def common_physics_step(self, batch, latent): +
+[docs] + def common_physics_step(self, batch, latent): ''' Called from both :func:`train_step <molearn.trainers.OpenMM_Physics_Trainer.train_step>` and :func:`valid_step <molearn.trainers.OpenMM_Physics_Trainer.valid_step>`. Takes random interpolations between adjacent samples latent vectors. These are decoded (decoded structures saved as ``self._internal['generated'] = generated if needed elsewhere) and the energy terms calculated with ``self.physics_loss``. @@ -84,16 +87,19 @@

Source code for molearn.trainers.openmm_physics_trainer

alpha = torch.rand(int(len(batch)//2), 1, 1).type_as(latent) latent_interpolated = (1-alpha)*latent[:-1:2] + alpha*latent[1::2] - generated = self.autoencoder.decode(latent_interpolated)[:,:,:batch.size(2)] + generated = self.autoencoder.decode(latent_interpolated)[:, :, :batch.size(2)] self._internal['generated'] = generated energy = self.physics_loss(generated) - energy[energy.isinf()]=1e35 + energy[energy.isinf()] = 1e35 energy = torch.clamp(energy, max=1e34) energy = energy.nanmean() - return {'physics_loss':energy}#a if not energy.isinf() else torch.tensor(0.0)}
+ return {'physics_loss':energy} # a if not energy.isinf() else torch.tensor(0.0)}
+ -
[docs] def train_step(self, batch): +
+[docs] + def train_step(self, batch): ''' This method overrides :func:`Trainer.train_step <molearn.trainers.Trainer.train_step>` and adds an additional 'Physics_loss' term. @@ -115,7 +121,10 @@

Source code for molearn.trainers.openmm_physics_trainer

results['loss'] = final_loss return results
-
[docs] def valid_step(self, batch): + +
+[docs] + def valid_step(self, batch): ''' This method overrides :func:`Trainer.valid_step <molearn.trainers.Trainer.valid_step>` and adds an additional 'Physics_loss' term. @@ -131,10 +140,13 @@

Source code for molearn.trainers.openmm_physics_trainer

results = self.common_step(batch) results.update(self.common_physics_step(batch, self._internal['encoded'])) - #scale = (self.psf*results['mse_loss'])/(results['physics_loss'] +1e-5) + # scale = (self.psf*results['mse_loss'])/(results['physics_loss'] +1e-5) final_loss = torch.log(results['mse_loss'])+self.psf*torch.log(results['physics_loss']) results['loss'] = final_loss - return results
+ return results
+
+ + if __name__=='__main__': pass @@ -175,8 +187,8 @@

Navigation

\ No newline at end of file diff --git a/docs/build/_modules/molearn/trainers/torch_physics_trainer.html b/docs/build/_modules/molearn/trainers/torch_physics_trainer.html index 7082761..6a30ff5 100644 --- a/docs/build/_modules/molearn/trainers/torch_physics_trainer.html +++ b/docs/build/_modules/molearn/trainers/torch_physics_trainer.html @@ -1,18 +1,15 @@ - - + molearn.trainers.torch_physics_trainer — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -41,7 +38,10 @@

Source code for molearn.trainers.torch_physics_trainer

from molearn.loss_functions import TorchProteinEnergy from .trainer import Trainer -
[docs]class Torch_Physics_Trainer(Trainer): + +
+[docs] +class Torch_Physics_Trainer(Trainer): ''' Torch_Physics_Trainer subclasses Trainer and replaces the valid_step and train_step. An extra 'physics_loss' (bonds, angles, and torsions) is calculated using pytorch. @@ -50,16 +50,21 @@

Source code for molearn.trainers.torch_physics_trainer

def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) -
[docs] def prepare_physics(self, physics_scaling_factor=0.1): +
+[docs] + def prepare_physics(self, physics_scaling_factor=0.1): ''' Create ``self.physics_loss`` object from :func:`loss_functions.TorchProteinEnergy <molearn.loss_functions.TorchProteinEnergy>` Needs ``self.std``, ``self._data`` to have been set with :func:`Trainer.set_data <molearn.trainer.Trainer.set_data>` :param float physics_scaling_factor: (default: 0.1) scaling factor saved to ``self.psf`` that is used in :func: `train_step <molearn.trainers.Torch_Physics_Trainer.train_step>` It will control the relative importance of mse_loss and physics_loss in training. ''' self.psf = physics_scaling_factor - self.physics_loss = TorchProteinEnergy(self._data.dataset[0]*self.std, pdb_atom_names = self._data.get_atominfo(), device = self.device, method = 'roll')
+ self.physics_loss = TorchProteinEnergy(self._data.dataset[0]*self.std, pdb_atom_names=self._data.get_atominfo(), device=self.device, method='roll')
-
[docs] def common_physics_step(self, batch, latent): + +
+[docs] + def common_physics_step(self, batch, latent): ''' Called from both :func:`train_step <molearn.trainers.Torch_Physics_Trainer.train_step>` and :func:`valid_step <molearn.trainers.Torch_Physics_Trainer.valid_step>`. Takes random interpolations between adjacent samples latent vectors. These are decoded (decoded structures saved as ``self._internal['generated'] = generated if needed elsewhere) and the energy terms calculated with ``self.physics_loss``. @@ -69,8 +74,8 @@

Source code for molearn.trainers.torch_physics_trainer

''' alpha = torch.rand(int(len(batch)//2), 1, 1).type_as(latent) latent_interpolated = (1-alpha)*latent[:-1:2] + alpha*latent[1::2] - generated = self.autoencoder.decode(latent_interpolated)[:,:,:batch.size(2)] - bond, angle, torsion = self.physics_loss._roll_bond_angle_torsion_loss(generated*self.std) + generated = self.autoencoder.decode(latent_interpolated)[:, :, :batch.size(2)] + bond, angle, torsion = self.physics_loss._roll_bond_angle_torsion_loss(generated*self.std) n = len(generated) bond/=n angle/=n @@ -78,12 +83,14 @@

Source code for molearn.trainers.torch_physics_trainer

_all = torch.tensor([bond, angle, torsion]) _all[_all.isinf()]=1e35 total_physics = _all.nansum() - #total_physics = torch.nansum(torch.tensor([bond ,angle ,torsion])) + # total_physics = torch.nansum(torch.tensor([bond ,angle ,torsion])) return {'physics_loss':total_physics, 'bond_energy':bond, 'angle_energy':angle, 'torsion_energy':torsion}
-
[docs] def train_step(self, batch): +
+[docs] + def train_step(self, batch): ''' This method overrides :func:`Trainer.train_step <molearn.trainers.Trainer.train_step>` and adds an additional 'Physics_loss' term. @@ -104,7 +111,10 @@

Source code for molearn.trainers.torch_physics_trainer

results['loss'] = final_loss return results
-
[docs] def valid_step(self, batch): + +
+[docs] + def valid_step(self, batch): ''' This method overrides :func:`Trainer.valid_step <molearn.trainers.Trainer.valid_step>` and adds an additional 'Physics_loss' term. @@ -119,10 +129,12 @@

Source code for molearn.trainers.torch_physics_trainer

''' results = self.common_step(batch) results.update(self.common_physics_step(batch, self._internal['encoded'])) - #scale = self.psf*results['mse_loss']/(results['physics_loss']+1e-5) + # scale = self.psf*results['mse_loss']/(results['physics_loss']+1e-5) final_loss = torch.log(results['mse_loss'])+self.psf*torch.log(results['physics_loss']) results['loss'] = final_loss - return results
+ return results
+
+ if __name__=='__main__': @@ -164,8 +176,8 @@

Navigation

\ No newline at end of file diff --git a/docs/build/_modules/molearn/trainers/trainer.html b/docs/build/_modules/molearn/trainers/trainer.html index 062b4bf..54ac44a 100644 --- a/docs/build/_modules/molearn/trainers/trainer.html +++ b/docs/build/_modules/molearn/trainers/trainer.html @@ -1,18 +1,15 @@ - - + molearn.trainers.trainer — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -44,14 +41,16 @@

Source code for molearn.trainers.trainer

 import time
 import torch
 from molearn.data import PDBData
-import warnings
-from decimal import Decimal
 import json
 
+
 class TrainingFailure(Exception):
     pass
 
-
[docs]class Trainer(): + +
+[docs] +class Trainer: ''' Trainer class that defines a number of useful methods for training an autoencoder. @@ -70,9 +69,7 @@

Source code for molearn.trainers.trainer

 
     '''
     
-
-
-    def __init__(self, device = None, log_filename = 'log_file.dat'):
+    def __init__(self, device=None, log_filename='log_file.dat'):
         '''
         :param torch.Device device: if not given will be determinined automatically based on torch.cuda.is_available()
         :param str log_filename: (default: 'default_log_filename.json') file used to log outputs to
@@ -90,7 +87,9 @@ 

Source code for molearn.trainers.trainer

         self.log_filename = 'default_log_filename.json'
         self.scheduler_key = None
 
-
[docs] def get_network_summary(self,): +
+[docs] + def get_network_summary(self): ''' returns a dictionary containing information about the size of the autoencoder. ''' @@ -98,15 +97,17 @@

Source code for molearn.trainers.trainer

             return sum(p.numel() for p in model.parameters() if (p.requires_grad and trainable_only))
 
         return dict(
-            encoder_trainable = get_parameters(True, self.autoencoder.encoder),
-            encoder_total = get_parameters(False, self.autoencoder.encoder),
-            decoder_trainable = get_parameters(True, self.autoencoder.decoder),
-            decoder_total = get_parameters(False, self.autoencoder.decoder),
-            autoencoder_trainable = get_parameters(True, self.autoencoder),
-            autoencoder_total = get_parameters(False, self.autoencoder),
-                      )
- -
[docs] def set_autoencoder(self, autoencoder, **kwargs): + encoder_trainable=get_parameters(True, self.autoencoder.encoder), + encoder_total=get_parameters(False, self.autoencoder.encoder), + decoder_trainable=get_parameters(True, self.autoencoder.decoder), + decoder_total=get_parameters(False, self.autoencoder.decoder), + autoencoder_trainable=get_parameters(True, self.autoencoder), + autoencoder_total=get_parameters(False, self.autoencoder))
+ + +
+[docs] + def set_autoencoder(self, autoencoder, **kwargs): ''' :param autoencoder: (:func:`autoencoder <molearn.models>`,) torch network class that implements ``autoencoder.encode``, and ``autoencoder.decode``. Please pass the class not the instance :param \*\*kwargs: any other kwargs given to this method will be used to initialise the network ``self.autoencoder = autoencoder(**kwargs)`` @@ -117,7 +118,10 @@

Source code for molearn.trainers.trainer

             self.autoencoder = autoencoder.to(self.device)
         self._autoencoder_kwargs = kwargs
-
[docs] def set_dataloader(self, train_dataloader=None, valid_dataloader=None): + +
+[docs] + def set_dataloader(self, train_dataloader=None, valid_dataloader=None): ''' :param torch.DataLoader train_dataloader: Alternatively set using ``trainer.train_dataloader = dataloader`` :param torch.DataLoader valid_dataloader: Alternatively set using ``trainer.valid_dataloader = dataloader`` @@ -127,7 +131,10 @@

Source code for molearn.trainers.trainer

         if valid_dataloader is not None:
             self.valid_dataloader = valid_dataloader
-
[docs] def set_data(self, data, **kwargs): + +
+[docs] + def set_data(self, data, **kwargs): ''' Sets up internal variables and gives trainer access to dataloaders. ``self.train_dataloader``, ``self.valid_dataloader``, ``self.std``, ``self.mean``, ``self.mol`` will all be obtained from this object. @@ -146,7 +153,9 @@

Source code for molearn.trainers.trainer

         self._data = data
-
[docs] def prepare_optimiser(self, lr = 1e-3, weight_decay = 0.0001, **optimiser_kwargs): +
+[docs] + def prepare_optimiser(self, lr=1e-3, weight_decay=0.0001, **optimiser_kwargs): ''' The Default optimiser is ``AdamW`` and is saved in ``self.optimiser``. With no optional arguments this function is the same as doing: @@ -156,9 +165,12 @@

Source code for molearn.trainers.trainer

         :param float weight_decay: (default: 0.0001) optimiser weight_decay
         :param \*\*optimiser_kwargs: other kwargs that are passed onto AdamW
         '''
-        self.optimiser = torch.optim.AdamW(self.autoencoder.parameters(), lr=lr, weight_decay = weight_decay, **optimiser_kwargs)
+ self.optimiser = torch.optim.AdamW(self.autoencoder.parameters(), lr=lr, weight_decay=weight_decay, **optimiser_kwargs)
-
[docs] def log(self, log_dict, verbose=None): + +
+[docs] + def log(self, log_dict, verbose=None): ''' Then contents of log_dict are dumped using ``json.dumps(log_dict)`` and printed and/or appended to ``self.log_filename`` This function is called from :func:`self.run <molearn.trainers.Trainer.run>` @@ -173,7 +185,10 @@

Source code for molearn.trainers.trainer

         with open(self.log_filename, 'a') as f:
             f.write(dump+'\n')
-
[docs] def scheduler_step(self, logs): + +
+[docs] + def scheduler_step(self, logs): ''' This function does nothing. It is called after :func:`self.valid_epoch <molearn.trainers.Trainer.valid_epoch>` in :func:`Trainer.run() <molearn.trainers.Trainer.run>` and before :func:`checkpointing <molearn.trainers.Trainer.checkpoint>`. It is designed to be overridden if you wish to use a scheduler. @@ -181,7 +196,10 @@

Source code for molearn.trainers.trainer

         '''
         pass
-
[docs] def run(self, max_epochs=100, log_filename = None, log_folder=None, checkpoint_frequency=1, checkpoint_folder='checkpoint_folder', allow_n_failures=10, verbose=None): + +
+[docs] + def run(self, max_epochs=100, log_filename=None, log_folder=None, checkpoint_frequency=1, checkpoint_folder='checkpoint_folder', allow_n_failures=10, verbose=None): ''' Calls the following in a loop: @@ -222,20 +240,20 @@

Source code for molearn.trainers.trainer

                     self.scheduler_step(logs)
                     if self.best is None or self.best > logs['valid_loss']:
                         self.checkpoint(epoch, logs, checkpoint_folder)
-                    elif epoch%checkpoint_frequency==0:
+                    elif epoch % checkpoint_frequency == 0:
                         self.checkpoint(epoch, logs, checkpoint_folder)
                     time4 = time.time()
-                    logs.update(epoch = epoch,
+                    logs.update(epoch=epoch,
                             train_seconds=time2-time1,
                             valid_seconds=time3-time2,
-                            checkpoint_seconds= time4-time3,
+                            checkpoint_seconds=time4-time3,
                             total_seconds=time4-time1)
                     self.log(logs)
                     if np.isnan(logs['valid_loss']) or np.isnan(logs['train_loss']):
                         raise TrainingFailure('nan received, failing')
                     self.epoch+= 1
             except TrainingFailure:
-                if attempt==(allow_n_failures-1):
+                if attempt == (allow_n_failures-1):
                     failure_message = f'Training Failure due to Nan in attempt {attempt}, end now/n'
                     self.log({'Failure':failure_message})
                     raise TrainingFailure('nan received, failing')
@@ -247,7 +265,9 @@ 

Source code for molearn.trainers.trainer

                 break
-
[docs] def train_epoch(self,epoch): +
+[docs] + def train_epoch(self,epoch): ''' Train one epoch. Called once an epoch from :func:`trainer.run <molearn.trainers.Trainer.run>` This method performs the following functions: @@ -282,7 +302,10 @@

Source code for molearn.trainers.trainer

             N+=len(batch)
         return {f'train_{key}': results[key]/N for key in results.keys()}
-
[docs] def train_step(self, batch): + +
+[docs] + def train_step(self, batch): ''' Called from :func:`Trainer.train_epoch <molearn.trainers.Trainer.train_epoch>`. @@ -294,7 +317,10 @@

Source code for molearn.trainers.trainer

         results['loss'] = results['mse_loss']
         return results
-
[docs] def common_step(self, batch): + +
+[docs] + def common_step(self, batch): ''' Called from both train_step and valid_step. Calculates the mean squared error loss for self.autoencoder. @@ -309,10 +335,12 @@

Source code for molearn.trainers.trainer

         self._internal['encoded'] = encoded
         decoded = self.autoencoder.decode(encoded)[:,:,:batch.size(2)]
         self._internal['decoded'] = decoded
-        return dict(mse_loss = ((batch-decoded)**2).mean())
+ return dict(mse_loss=((batch-decoded)**2).mean())
-
[docs] def valid_epoch(self,epoch): +
+[docs] + def valid_epoch(self, epoch): ''' Called once an epoch from :func:`trainer.run <molearn.trainers.Trainer.run>` within a no_grad context. This method performs the following functions: @@ -340,7 +368,10 @@

Source code for molearn.trainers.trainer

             N+=len(batch)
         return {f'valid_{key}': results[key]/N for key in results.keys()}
-
[docs] def valid_step(self, batch): + +
+[docs] + def valid_step(self, batch): ''' Called from :func:`Trainer.valid_epoch<molearn.trainer.Trainer.valid_epoch>` on every mini-batch. @@ -352,7 +383,10 @@

Source code for molearn.trainers.trainer

         results['loss'] = results['mse_loss']
         return results
-
[docs] def learning_rate_sweep(self, max_lr=100, min_lr=1e-5, number_of_iterations=1000, checkpoint_folder='checkpoint_sweep',train_on='mse_loss', save=['loss', 'mse_loss']): + +
+[docs] + def learning_rate_sweep(self, max_lr=100, min_lr=1e-5, number_of_iterations=1000, checkpoint_folder='checkpoint_sweep', train_on='mse_loss', save=['loss', 'mse_loss']): ''' Deprecated method. Performs a sweep of learning rate between ``max_lr`` and ``min_lr`` over ``number_of_iterations``. @@ -367,10 +401,12 @@

Source code for molearn.trainers.trainer

         :rtype: numpy.ndarray
         '''
         self.autoencoder.train()
+        
         def cycle(iterable):
             while True:
                 for i in iterable:
                     yield i
+                    
         init_loss = 0.0
         values = []
         data = iter(cycle(self.train_dataloader))
@@ -381,20 +417,22 @@ 

Source code for molearn.trainers.trainer

 
             self.optimiser.zero_grad()
             result = self.train_step(batch)
-            #result['loss']/=len(batch)
+            # result['loss']/=len(batch)
             result[train_on].backward()
             self.optimiser.step()
             values.append((lr,)+tuple((result[name].item() for name in save)))
-            #print(i,lr, result['loss'].item())
             if i==0:
                 init_loss = result[train_on].item()
-            #if result[train_on].item()>1e6*init_loss:
+            # if result[train_on].item()>1e6*init_loss:
             #    break
         values = np.array(values)
         print('min value ', values[np.nanargmin(values[:,1])])
         return values
-
[docs] def update_optimiser_hyperparameters(self, **kwargs): + +
+[docs] + def update_optimiser_hyperparameters(self, **kwargs): ''' Update optimeser hyperparameter e.g. ``trainer.update_optimiser_hyperparameters(lr = 1e3)`` @@ -404,7 +442,10 @@

Source code for molearn.trainers.trainer

             for key, value in kwargs.items():
                 g[key] = value
-
[docs] def checkpoint(self, epoch, valid_logs, checkpoint_folder, loss_key='valid_loss'): + +
+[docs] + def checkpoint(self, epoch, valid_logs, checkpoint_folder, loss_key='valid_loss'): ''' Checkpoint the current network. The checkpoint will be saved as ``'last.ckpt'``. If valid_logs[loss_key] is better than self.best then this checkpoint will replace self.best and ``'last.ckpt'`` will be renamed to ``f'{checkpoint_folder}/checkpoint_epoch{epoch}_loss{valid_loss}.ckpt'`` and the former best (filename saved as ``self.best_name``) will be deleted @@ -436,7 +477,10 @@

Source code for molearn.trainers.trainer

             self.best_epoch = epoch
             self.best = valid_loss
-
[docs] def load_checkpoint(self, checkpoint_name ='best', checkpoint_folder = '', load_optimiser=True): + +
+[docs] + def load_checkpoint(self, checkpoint_name='best', checkpoint_folder='', load_optimiser=True): ''' Load checkpoint. @@ -456,7 +500,7 @@

Source code for molearn.trainers.trainer

             _name = f'{checkpoint_folder}/last.ckpt'
         else:
             _name = f'{checkpoint_folder}/{checkpoint_name}'
-        checkpoint = torch.load(_name, map_location = self.device)
+        checkpoint = torch.load(_name, map_location=self.device)
         if not hasattr(self, 'autoencoder'):
             raise NotImplementedError('self.autoencoder does not exist, I have no way of knowing what network you want to load checkoint weights into yet, please set the network first')
 
@@ -466,7 +510,10 @@ 

Source code for molearn.trainers.trainer

                 raise NotImplementedError('self.optimiser does not exist, I have no way of knowing what optimiser you previously used, please set it first.')
             self.optimiser.load_state_dict(checkpoint['optimizer_state_dict'])
         epoch = checkpoint['epoch']
-        self.epoch = epoch+1
+ self.epoch = epoch+1
+
+ + if __name__=='__main__': pass @@ -507,8 +554,8 @@

Navigation

\ No newline at end of file diff --git a/docs/build/_sources/faq.rst.txt b/docs/build/_sources/faq.rst.txt index 8ee24b0..d302b0a 100644 --- a/docs/build/_sources/faq.rst.txt +++ b/docs/build/_sources/faq.rst.txt @@ -3,41 +3,32 @@ Frequently Asked Questions ########################## -I get an IndexError when I try loading a multiPDB -------------------------------------------------- - -This is likely an error thrown by MDAnalysis. Typically this happens when -attempting to load a multiPDB file saved with software like VMD, which uses a -different syntax to indicate the end of a conformer in the file. A way to get -around this, is to re-save the file in a format MDAnalysis can parse, e.g., by -loading and re-saving the file via biobox. - -.. code-block:: - - import biobox as bb - M = bb.Molecule(filename) - M.write_pdb(newfilename) - - I cannot install openmmtorchplugin ---------------------------------- openmmtorchplugin depends on conda-forge builds of pyTorch and OpenMM. Due to this dependency, Windows cannot be supported. + Installation can be carried out via terminal with conda-forge: .. code:: conda install -c conda-forge openmmtorchplugin + The following Python versions are supported: 3.8, 3.9, 3.10, 3.11. -If you are running into any issue, attempt a fresh install in a new conda + +If you run into any issue, either at installation or runtime, ensure you have a +plugin version >=1.1.3, as previous ones have known compatibility with OpenMM. +The easiest way to ensure the most up to date version of molearn and the +openmmtorchplugin are installed, is to run a fresh install in a new conda environment: .. code:: conda create --name test_env python=3.10 - conda install -c conda-forge openmmtorchplugin molearn + conda install -n test_env -c conda-forge openmmtorchplugin molearn + openmmtorchplugin is built with cuda_compiler_version=11.2 in conda-forge CI tools. This has been successfully tested on Ubuntu machines running with the driver @@ -47,6 +38,23 @@ The Nvidia website tabulates minimum driver versions required and version compat `NVIDIA CUDA Toolkit Minimum driver versions `_ +I get an IndexError when I try loading a multiPDB +------------------------------------------------- + +This is likely an error thrown by MDAnalysis. Typically this happens when +attempting to load a multiPDB file saved with software like VMD, which uses a +different syntax to indicate the end of a conformer in the file. A way to get +around this, is to re-save the file in a format MDAnalysis can parse, e.g., by +loading and re-saving the file via biobox. + +.. code-block:: + + import biobox as bb + M = bb.Molecule(filename) + M.write_pdb(newfilename) + + + The GUI freezes when I use it/does not work as expected ------------------------------------------------------- diff --git a/docs/build/_static/basic.css b/docs/build/_static/basic.css index 0889677..30fee9d 100644 --- a/docs/build/_static/basic.css +++ b/docs/build/_static/basic.css @@ -4,7 +4,7 @@ * * Sphinx stylesheet -- basic theme. * - * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ @@ -237,6 +237,10 @@ a.headerlink { visibility: hidden; } +a:visited { + color: #551A8B; +} + h1:hover > a.headerlink, h2:hover > a.headerlink, h3:hover > a.headerlink, @@ -324,17 +328,17 @@ aside.sidebar { p.sidebar-title { font-weight: bold; } + nav.contents, aside.topic, - div.admonition, div.topic, blockquote { clear: left; } /* -- topics ---------------------------------------------------------------- */ + nav.contents, aside.topic, - div.topic { border: 1px solid #ccc; padding: 7px; @@ -375,7 +379,6 @@ div.sidebar > :last-child, aside.sidebar > :last-child, nav.contents > :last-child, aside.topic > :last-child, - div.topic > :last-child, div.admonition > :last-child { margin-bottom: 0; @@ -385,7 +388,6 @@ div.sidebar::after, aside.sidebar::after, nav.contents::after, aside.topic::after, - div.topic::after, div.admonition::after, blockquote::after { @@ -611,25 +613,6 @@ ul.simple p { margin-bottom: 0; } -/* Docutils 0.17 and older (footnotes & citations) */ -dl.footnote > dt, -dl.citation > dt { - float: left; - margin-right: 0.5em; -} - -dl.footnote > dd, -dl.citation > dd { - margin-bottom: 0em; -} - -dl.footnote > dd:after, -dl.citation > dd:after { - content: ""; - clear: both; -} - -/* Docutils 0.18+ (footnotes & citations) */ aside.footnote > span, div.citation > span { float: left; @@ -654,8 +637,6 @@ div.citation > p:last-of-type:after { clear: both; } -/* Footnotes & citations ends */ - dl.field-list { display: grid; grid-template-columns: fit-content(30%) auto; @@ -668,10 +649,6 @@ dl.field-list > dt { padding-right: 5px; } -dl.field-list > dt:after { - content: ":"; -} - dl.field-list > dd { padding-left: 0.5em; margin-top: 0em; @@ -697,6 +674,16 @@ dd { margin-left: 30px; } +.sig dd { + margin-top: 0px; + margin-bottom: 0px; +} + +.sig dl { + margin-top: 0px; + margin-bottom: 0px; +} + dl > dd:last-child, dl > dd:last-child > :last-child { margin-bottom: 0; @@ -765,6 +752,14 @@ abbr, acronym { cursor: help; } +.translated { + background-color: rgba(207, 255, 207, 0.2) +} + +.untranslated { + background-color: rgba(255, 207, 207, 0.2) +} + /* -- code displays --------------------------------------------------------- */ pre { diff --git a/docs/build/_static/doctools.js b/docs/build/_static/doctools.js index c3db08d..d06a71d 100644 --- a/docs/build/_static/doctools.js +++ b/docs/build/_static/doctools.js @@ -4,12 +4,19 @@ * * Base JavaScript utilities for all Sphinx HTML documentation. * - * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ "use strict"; +const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([ + "TEXTAREA", + "INPUT", + "SELECT", + "BUTTON", +]); + const _ready = (callback) => { if (document.readyState !== "loading") { callback(); @@ -18,73 +25,11 @@ const _ready = (callback) => { } }; -/** - * highlight a given string on a node by wrapping it in - * span elements with the given class name. - */ -const _highlight = (node, addItems, text, className) => { - if (node.nodeType === Node.TEXT_NODE) { - const val = node.nodeValue; - const parent = node.parentNode; - const pos = val.toLowerCase().indexOf(text); - if ( - pos >= 0 && - !parent.classList.contains(className) && - !parent.classList.contains("nohighlight") - ) { - let span; - - const closestNode = parent.closest("body, svg, foreignObject"); - const isInSVG = closestNode && closestNode.matches("svg"); - if (isInSVG) { - span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); - } else { - span = document.createElement("span"); - span.classList.add(className); - } - - span.appendChild(document.createTextNode(val.substr(pos, text.length))); - parent.insertBefore( - span, - parent.insertBefore( - document.createTextNode(val.substr(pos + text.length)), - node.nextSibling - ) - ); - node.nodeValue = val.substr(0, pos); - - if (isInSVG) { - const rect = document.createElementNS( - "http://www.w3.org/2000/svg", - "rect" - ); - const bbox = parent.getBBox(); - rect.x.baseVal.value = bbox.x; - rect.y.baseVal.value = bbox.y; - rect.width.baseVal.value = bbox.width; - rect.height.baseVal.value = bbox.height; - rect.setAttribute("class", className); - addItems.push({ parent: parent, target: rect }); - } - } - } else if (node.matches && !node.matches("button, select, textarea")) { - node.childNodes.forEach((el) => _highlight(el, addItems, text, className)); - } -}; -const _highlightText = (thisNode, text, className) => { - let addItems = []; - _highlight(thisNode, addItems, text, className); - addItems.forEach((obj) => - obj.parent.insertAdjacentElement("beforebegin", obj.target) - ); -}; - /** * Small JavaScript module for the documentation. */ const Documentation = { init: () => { - Documentation.highlightSearchWords(); Documentation.initDomainIndexTable(); Documentation.initOnKeyListeners(); }, @@ -126,51 +71,6 @@ const Documentation = { Documentation.LOCALE = catalog.locale; }, - /** - * highlight the search words provided in the url in the text - */ - highlightSearchWords: () => { - const highlight = - new URLSearchParams(window.location.search).get("highlight") || ""; - const terms = highlight.toLowerCase().split(/\s+/).filter(x => x); - if (terms.length === 0) return; // nothing to do - - // There should never be more than one element matching "div.body" - const divBody = document.querySelectorAll("div.body"); - const body = divBody.length ? divBody[0] : document.querySelector("body"); - window.setTimeout(() => { - terms.forEach((term) => _highlightText(body, term, "highlighted")); - }, 10); - - const searchBox = document.getElementById("searchbox"); - if (searchBox === null) return; - searchBox.appendChild( - document - .createRange() - .createContextualFragment( - '" - ) - ); - }, - - /** - * helper function to hide the search marks again - */ - hideSearchWords: () => { - document - .querySelectorAll("#searchbox .highlight-link") - .forEach((el) => el.remove()); - document - .querySelectorAll("span.highlighted") - .forEach((el) => el.classList.remove("highlighted")); - const url = new URL(window.location); - url.searchParams.delete("highlight"); - window.history.replaceState({}, "", url); - }, - /** * helper function to focus on search bar */ @@ -210,15 +110,11 @@ const Documentation = { ) return; - const blacklistedElements = new Set([ - "TEXTAREA", - "INPUT", - "SELECT", - "BUTTON", - ]); document.addEventListener("keydown", (event) => { - if (blacklistedElements.has(document.activeElement.tagName)) return; // bail for input elements - if (event.altKey || event.ctrlKey || event.metaKey) return; // bail with special keys + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.altKey || event.ctrlKey || event.metaKey) return; if (!event.shiftKey) { switch (event.key) { @@ -240,10 +136,6 @@ const Documentation = { event.preventDefault(); } break; - case "Escape": - if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break; - Documentation.hideSearchWords(); - event.preventDefault(); } } diff --git a/docs/build/_static/documentation_options.js b/docs/build/_static/documentation_options.js index a99dba8..15dd1bd 100644 --- a/docs/build/_static/documentation_options.js +++ b/docs/build/_static/documentation_options.js @@ -1,5 +1,4 @@ -var DOCUMENTATION_OPTIONS = { - URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'), +const DOCUMENTATION_OPTIONS = { VERSION: '2.0.1', LANGUAGE: 'en', COLLAPSE_INDEX: false, @@ -10,5 +9,5 @@ var DOCUMENTATION_OPTIONS = { SOURCELINK_SUFFIX: '.txt', NAVIGATION_WITH_KEYS: false, SHOW_SEARCH_SUMMARY: true, - ENABLE_SEARCH_SHORTCUTS: false, + ENABLE_SEARCH_SHORTCUTS: true, }; \ No newline at end of file diff --git a/docs/build/_static/language_data.js b/docs/build/_static/language_data.js index 2e22b06..250f566 100644 --- a/docs/build/_static/language_data.js +++ b/docs/build/_static/language_data.js @@ -5,7 +5,7 @@ * This script contains the language-specific data used by searchtools.js, * namely the list of stopwords, stemmer, scorer and splitter. * - * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ diff --git a/docs/build/_static/pygments.css b/docs/build/_static/pygments.css index 691aeb8..0d49244 100644 --- a/docs/build/_static/pygments.css +++ b/docs/build/_static/pygments.css @@ -17,6 +17,7 @@ span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: .highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */ .highlight .gd { color: #A00000 } /* Generic.Deleted */ .highlight .ge { font-style: italic } /* Generic.Emph */ +.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ .highlight .gr { color: #FF0000 } /* Generic.Error */ .highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ .highlight .gi { color: #00A000 } /* Generic.Inserted */ diff --git a/docs/build/_static/searchtools.js b/docs/build/_static/searchtools.js index ac4d586..7918c3f 100644 --- a/docs/build/_static/searchtools.js +++ b/docs/build/_static/searchtools.js @@ -4,7 +4,7 @@ * * Sphinx JavaScript utilities for the full-text search. * - * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ @@ -57,14 +57,14 @@ const _removeChildren = (element) => { const _escapeRegExp = (string) => string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string -const _displayItem = (item, highlightTerms, searchTerms) => { +const _displayItem = (item, searchTerms, highlightTerms) => { const docBuilder = DOCUMENTATION_OPTIONS.BUILDER; - const docUrlRoot = DOCUMENTATION_OPTIONS.URL_ROOT; const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX; const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX; const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY; + const contentRoot = document.documentElement.dataset.content_root; - const [docName, title, anchor, descr] = item; + const [docName, title, anchor, descr, score, _filename] = item; let listItem = document.createElement("li"); let requestUrl; @@ -75,29 +75,35 @@ const _displayItem = (item, highlightTerms, searchTerms) => { if (dirname.match(/\/index\/$/)) dirname = dirname.substring(0, dirname.length - 6); else if (dirname === "index/") dirname = ""; - requestUrl = docUrlRoot + dirname; + requestUrl = contentRoot + dirname; linkUrl = requestUrl; } else { // normal html builders - requestUrl = docUrlRoot + docName + docFileSuffix; + requestUrl = contentRoot + docName + docFileSuffix; linkUrl = docName + docLinkSuffix; } - const params = new URLSearchParams(); - params.set("highlight", [...highlightTerms].join(" ")); let linkEl = listItem.appendChild(document.createElement("a")); - linkEl.href = linkUrl + "?" + params.toString() + anchor; + linkEl.href = linkUrl + anchor; + linkEl.dataset.score = score; linkEl.innerHTML = title; - if (descr) - listItem.appendChild(document.createElement("span")).innerText = + if (descr) { + listItem.appendChild(document.createElement("span")).innerHTML = " (" + descr + ")"; + // highlight search terms in the description + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + } else if (showSearchSummary) fetch(requestUrl) .then((responseData) => responseData.text()) .then((data) => { if (data) listItem.appendChild( - Search.makeSearchSummary(data, searchTerms, highlightTerms) + Search.makeSearchSummary(data, searchTerms) ); + // highlight search terms in the summary + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); }); Search.output.appendChild(listItem); }; @@ -116,15 +122,15 @@ const _finishSearch = (resultCount) => { const _displayNextItem = ( results, resultCount, + searchTerms, highlightTerms, - searchTerms ) => { // results left, load the summary and display it // this is intended to be dynamic (don't sub resultsCount) if (results.length) { - _displayItem(results.pop(), highlightTerms, searchTerms); + _displayItem(results.pop(), searchTerms, highlightTerms); setTimeout( - () => _displayNextItem(results, resultCount, highlightTerms, searchTerms), + () => _displayNextItem(results, resultCount, searchTerms, highlightTerms), 5 ); } @@ -155,10 +161,8 @@ const Search = { _pulse_status: -1, htmlToText: (htmlString) => { - const htmlElement = document - .createRange() - .createContextualFragment(htmlString); - _removeChildren(htmlElement.querySelectorAll(".headerlink")); + const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html'); + htmlElement.querySelectorAll(".headerlink").forEach((el) => { el.remove() }); const docContent = htmlElement.querySelector('[role="main"]'); if (docContent !== undefined) return docContent.textContent; console.warn( @@ -239,6 +243,12 @@ const Search = { * execute search (requires search index to be loaded) */ query: (query) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + const allTitles = Search._index.alltitles; + const indexEntries = Search._index.indexentries; + // stem the search terms and add them to the correct list const stemmer = new Stemmer(); const searchTerms = new Set(); @@ -266,6 +276,10 @@ const Search = { } }); + if (SPHINX_HIGHLIGHT_ENABLED) { // set in sphinx_highlight.js + localStorage.setItem("sphinx_highlight_terms", [...highlightTerms].join(" ")) + } + // console.debug("SEARCH: searching for:"); // console.info("required: ", [...searchTerms]); // console.info("excluded: ", [...excludedTerms]); @@ -274,6 +288,40 @@ const Search = { let results = []; _removeChildren(document.getElementById("search-progress")); + const queryLower = query.toLowerCase(); + for (const [title, foundTitles] of Object.entries(allTitles)) { + if (title.toLowerCase().includes(queryLower) && (queryLower.length >= title.length/2)) { + for (const [file, id] of foundTitles) { + let score = Math.round(100 * queryLower.length / title.length) + results.push([ + docNames[file], + titles[file] !== title ? `${titles[file]} > ${title}` : title, + id !== null ? "#" + id : "", + null, + score, + filenames[file], + ]); + } + } + } + + // search for explicit entries in index directives + for (const [entry, foundEntries] of Object.entries(indexEntries)) { + if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) { + for (const [file, id] of foundEntries) { + let score = Math.round(100 * queryLower.length / entry.length) + results.push([ + docNames[file], + titles[file], + id ? "#" + id : "", + null, + score, + filenames[file], + ]); + } + } + } + // lookup as object objectTerms.forEach((term) => results.push(...Search.performObjectSearch(term, objectTerms)) @@ -320,7 +368,7 @@ const Search = { // console.info("search results:", Search.lastresults); // print the results - _displayNextItem(results, results.length, highlightTerms, searchTerms); + _displayNextItem(results, results.length, searchTerms, highlightTerms); }, /** @@ -401,8 +449,8 @@ const Search = { // prepare search const terms = Search._index.terms; const titleTerms = Search._index.titleterms; - const docNames = Search._index.docnames; const filenames = Search._index.filenames; + const docNames = Search._index.docnames; const titles = Search._index.titles; const scoreMap = new Map(); @@ -499,16 +547,15 @@ const Search = { /** * helper function to return a node containing the * search summary for a given text. keywords is a list - * of stemmed words, highlightWords is the list of normal, unstemmed - * words. the first one is used to find the occurrence, the - * latter for highlighting it. + * of stemmed words. */ - makeSearchSummary: (htmlText, keywords, highlightWords) => { - const text = Search.htmlToText(htmlText).toLowerCase(); + makeSearchSummary: (htmlText, keywords) => { + const text = Search.htmlToText(htmlText); if (text === "") return null; + const textLower = text.toLowerCase(); const actualStartPosition = [...keywords] - .map((k) => text.indexOf(k.toLowerCase())) + .map((k) => textLower.indexOf(k.toLowerCase())) .filter((i) => i > -1) .slice(-1)[0]; const startWithContext = Math.max(actualStartPosition - 120, 0); @@ -516,13 +563,9 @@ const Search = { const top = startWithContext === 0 ? "" : "..."; const tail = startWithContext + 240 < text.length ? "..." : ""; - let summary = document.createElement("div"); + let summary = document.createElement("p"); summary.classList.add("context"); - summary.innerText = top + text.substr(startWithContext, 240).trim() + tail; - - highlightWords.forEach((highlightWord) => - _highlightText(summary, highlightWord, "highlighted") - ); + summary.textContent = top + text.substr(startWithContext, 240).trim() + tail; return summary; }, diff --git a/docs/build/_static/sphinx_highlight.js b/docs/build/_static/sphinx_highlight.js new file mode 100644 index 0000000..8a96c69 --- /dev/null +++ b/docs/build/_static/sphinx_highlight.js @@ -0,0 +1,154 @@ +/* Highlighting utilities for Sphinx HTML documentation. */ +"use strict"; + +const SPHINX_HIGHLIGHT_ENABLED = true + +/** + * highlight a given string on a node by wrapping it in + * span elements with the given class name. + */ +const _highlight = (node, addItems, text, className) => { + if (node.nodeType === Node.TEXT_NODE) { + const val = node.nodeValue; + const parent = node.parentNode; + const pos = val.toLowerCase().indexOf(text); + if ( + pos >= 0 && + !parent.classList.contains(className) && + !parent.classList.contains("nohighlight") + ) { + let span; + + const closestNode = parent.closest("body, svg, foreignObject"); + const isInSVG = closestNode && closestNode.matches("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.classList.add(className); + } + + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + const rest = document.createTextNode(val.substr(pos + text.length)); + parent.insertBefore( + span, + parent.insertBefore( + rest, + node.nextSibling + ) + ); + node.nodeValue = val.substr(0, pos); + /* There may be more occurrences of search term in this node. So call this + * function recursively on the remaining fragment. + */ + _highlight(rest, addItems, text, className); + + if (isInSVG) { + const rect = document.createElementNS( + "http://www.w3.org/2000/svg", + "rect" + ); + const bbox = parent.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute("class", className); + addItems.push({ parent: parent, target: rect }); + } + } + } else if (node.matches && !node.matches("button, select, textarea")) { + node.childNodes.forEach((el) => _highlight(el, addItems, text, className)); + } +}; +const _highlightText = (thisNode, text, className) => { + let addItems = []; + _highlight(thisNode, addItems, text, className); + addItems.forEach((obj) => + obj.parent.insertAdjacentElement("beforebegin", obj.target) + ); +}; + +/** + * Small JavaScript module for the documentation. + */ +const SphinxHighlight = { + + /** + * highlight the search words provided in localstorage in the text + */ + highlightSearchWords: () => { + if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight + + // get and clear terms from localstorage + const url = new URL(window.location); + const highlight = + localStorage.getItem("sphinx_highlight_terms") + || url.searchParams.get("highlight") + || ""; + localStorage.removeItem("sphinx_highlight_terms") + url.searchParams.delete("highlight"); + window.history.replaceState({}, "", url); + + // get individual terms from highlight string + const terms = highlight.toLowerCase().split(/\s+/).filter(x => x); + if (terms.length === 0) return; // nothing to do + + // There should never be more than one element matching "div.body" + const divBody = document.querySelectorAll("div.body"); + const body = divBody.length ? divBody[0] : document.querySelector("body"); + window.setTimeout(() => { + terms.forEach((term) => _highlightText(body, term, "highlighted")); + }, 10); + + const searchBox = document.getElementById("searchbox"); + if (searchBox === null) return; + searchBox.appendChild( + document + .createRange() + .createContextualFragment( + '" + ) + ); + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords: () => { + document + .querySelectorAll("#searchbox .highlight-link") + .forEach((el) => el.remove()); + document + .querySelectorAll("span.highlighted") + .forEach((el) => el.classList.remove("highlighted")); + localStorage.removeItem("sphinx_highlight_terms") + }, + + initEscapeListener: () => { + // only install a listener if it is really needed + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; + if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { + SphinxHighlight.hideSearchWords(); + event.preventDefault(); + } + }); + }, +}; + +_ready(() => { + /* Do not call highlightSearchWords() when we are on the search page. + * It will highlight words from the *previous* search query. + */ + if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords(); + SphinxHighlight.initEscapeListener(); +}); diff --git a/docs/build/_static/sphinxdoc.css b/docs/build/_static/sphinxdoc.css index f922226..1e9ffe0 100644 --- a/docs/build/_static/sphinxdoc.css +++ b/docs/build/_static/sphinxdoc.css @@ -5,7 +5,7 @@ * Sphinx stylesheet -- sphinxdoc theme. Originally created by * Armin Ronacher for Werkzeug. * - * :copyright: Copyright 2007-2022 by the Sphinx team, see AUTHORS. + * :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS. * :license: BSD, see LICENSE for details. * */ @@ -151,6 +151,10 @@ a:hover { color: #2491CF; } +a:visited { + color: #551A8B; +} + div.body a { text-decoration: underline; } @@ -265,9 +269,9 @@ div.quotebar { padding: 2px 7px; border: 1px solid #ccc; } + nav.contents, aside.topic, - div.topic { background-color: #f8f8f8; } diff --git a/docs/build/analysis.html b/docs/build/analysis.html index d0a145d..5e6941c 100644 --- a/docs/build/analysis.html +++ b/docs/build/analysis.html @@ -1,19 +1,16 @@ - - + - + Analysis — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -45,15 +42,15 @@

Navigation

-

Analysis

+

Analysis

-class MolearnAnalysis[source]
+class MolearnAnalysis[source]

This class provides methods dedicated to the quality analysis of a trained model.

-generate(crd)[source]
+generate(crd)[source]

Generate a collection of protein conformations, given coordinates in the latent space.

Parameters:
@@ -67,7 +64,7 @@

Analysis
-get_all_dope_score(tensor, refine=True)[source]
+get_all_dope_score(tensor, refine=True)[source]

Calculate DOPE score of an ensemble of atom coordinates.

Parameters:
@@ -81,7 +78,7 @@

Analysis
-get_all_ramachandran_score(tensor)[source]
+get_all_ramachandran_score(tensor)[source]

Calculate Ramachandran score of an ensemble of atomic conrdinates.

Parameters:
@@ -92,7 +89,7 @@

Analysis
-get_dataset(key)[source]
+get_dataset(key)[source]
Parameters:

key (str) – key pointing to a dataset previously loaded with set_dataset

@@ -102,7 +99,7 @@

Analysis
-get_decoded(key)[source]
+get_decoded(key)[source]
Parameters:

key (str) – key pointing to a dataset previously loaded with set_dataset

@@ -112,7 +109,7 @@

Analysis
-get_dope(key, refine=True, **kwargs)[source]
+get_dope(key, refine=True, **kwargs)[source]
Parameters:

\ No newline at end of file diff --git a/docs/build/data.html b/docs/build/data.html index 0a9055c..3b84ef1 100644 --- a/docs/build/data.html +++ b/docs/build/data.html @@ -1,19 +1,16 @@ - - + - + Data Loading — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -45,10 +42,10 @@

Navigation

-

Data Loading

+

Data Loading

-class PDBData(filename=None, fix_terminal=False, atoms=None)[source]
+class PDBData(filename=None, fix_terminal=False, atoms=None)[source]

Create object enabling the manipulation of multi-PDB files into a dataset suitable for training.

Parameters:
@@ -61,7 +58,7 @@

Data Loading
-atomselect(atoms, ignore_atoms=[])[source]
+atomselect(atoms, ignore_atoms=[])[source]

From all imported PDBs, extract only atoms of interest. import_pdb must have been called at least once, either at class instantiation or as a separate call.

@@ -73,25 +70,25 @@

Data Loading
-fix_terminal()[source]
+fix_terminal()[source]

Rename OT1 N-terminal Oxygen to O if terminal oxygens are named OT1 and OT2 otherwise no oxygen will be selected during an atomselect using atoms = [‘CA’, ‘C’,’N’,’O’,’CB’]. No template will be found for terminal residue in openmm_loss. Alternative solution is to use atoms = [‘CA’, ‘C’, ‘N’, ‘O’, ‘CB’, ‘OT1’]. instead.

-frame()[source]
+frame()[source]

return biobox.Molecule object with loaded data

-get_atominfo()[source]
+get_atominfo()[source]

generate list of all atoms in dataset, where every line contains [atom name, residue name, resid]

-get_dataloader(batch_size, validation_split=0.1, pin_memory=True, dataset_sample_size=- 1, manual_seed=None, shuffle=True, sampler=None)[source]
+get_dataloader(batch_size, validation_split=0.1, pin_memory=True, dataset_sample_size=-1, manual_seed=None, shuffle=True, sampler=None)[source]
Parameters:

\ No newline at end of file diff --git a/docs/build/faq.html b/docs/build/faq.html index 987b10d..4d28ec4 100644 --- a/docs/build/faq.html +++ b/docs/build/faq.html @@ -1,19 +1,16 @@ - - + - + Frequently Asked Questions — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -41,33 +38,23 @@

Navigation

-

Frequently Asked Questions

-
-

I get an IndexError when I try loading a multiPDB

-

This is likely an error thrown by MDAnalysis. Typically this happens when -attempting to load a multiPDB file saved with software like VMD, which uses a -different syntax to indicate the end of a conformer in the file. A way to get -around this, is to re-save the file in a format MDAnalysis can parse, e.g., by -loading and re-saving the file via biobox.

-
import biobox as bb
-M = bb.Molecule(filename)
-M.write_pdb(newfilename)
-
-
-
+

Frequently Asked Questions

-

I cannot install openmmtorchplugin

+

I cannot install openmmtorchplugin

openmmtorchplugin depends on conda-forge builds of pyTorch and OpenMM. -Due to this dependency, Windows cannot be supported. -Installation can be carried out via terminal with conda-forge:

+Due to this dependency, Windows cannot be supported.

+

Installation can be carried out via terminal with conda-forge:

conda install -c conda-forge openmmtorchplugin
 
-

The following Python versions are supported: 3.8, 3.9, 3.10, 3.11. -If you are running into any issue, attempt a fresh install in a new conda +

The following Python versions are supported: 3.8, 3.9, 3.10, 3.11.

+

If you run into any issue, either at installation or runtime, ensure you have a +plugin version >=1.1.3, as previous ones have known compatibility with OpenMM. +The easiest way to ensure the most up to date version of molearn and the +openmmtorchplugin are installed, is to run a fresh install in a new conda environment:

conda create --name test_env python=3.10
-conda install -c conda-forge openmmtorchplugin molearn
+conda install -n test_env -c conda-forge openmmtorchplugin molearn
 

openmmtorchplugin is built with cuda_compiler_version=11.2 in conda-forge CI tools. @@ -76,8 +63,21 @@

I cannot install openmmtorchpluginNVIDIA CUDA Toolkit Minimum driver versions

+
+

I get an IndexError when I try loading a multiPDB

+

This is likely an error thrown by MDAnalysis. Typically this happens when +attempting to load a multiPDB file saved with software like VMD, which uses a +different syntax to indicate the end of a conformer in the file. A way to get +around this, is to re-save the file in a format MDAnalysis can parse, e.g., by +loading and re-saving the file via biobox.

+
import biobox as bb
+M = bb.Molecule(filename)
+M.write_pdb(newfilename)
+
+
+
-

The GUI freezes when I use it/does not work as expected

+

The GUI freezes when I use it/does not work as expected

This is usually caused by an issue with packages handling communications between the GUI and Jupyter, see here. Currently, a workaround is to use older versions of tornado. In Python 3.10, the following packages have been observed to yield correct behaviour:

@@ -100,8 +100,8 @@

The GUI freezes when I use it/does not work as expectedTable of Contents

\ No newline at end of file diff --git a/docs/build/genindex.html b/docs/build/genindex.html index 547fb83..5116c17 100644 --- a/docs/build/genindex.html +++ b/docs/build/genindex.html @@ -1,18 +1,15 @@ - - + Index — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -451,8 +448,8 @@

Navigation

\ No newline at end of file diff --git a/docs/build/index.html b/docs/build/index.html index fef38e3..55a26aa 100644 --- a/docs/build/index.html +++ b/docs/build/index.html @@ -1,19 +1,16 @@ - - + - + Welcome to molearn’s documentation! — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -41,7 +38,7 @@

Navigation

-

Welcome to molearn’s documentation!

+

Welcome to molearn’s documentation!

molearn is a Python package streamlining the implementation of machine learning models dedicated to the generation of protein conformations from example data obtained via experiment or molecular simulation.

@@ -51,16 +48,52 @@

Welcome to molearn’s documentation!

Contents:

\ No newline at end of file diff --git a/docs/build/loss_functions.html b/docs/build/loss_functions.html index c9e8f02..72c7bd3 100644 --- a/docs/build/loss_functions.html +++ b/docs/build/loss_functions.html @@ -1,19 +1,16 @@ - - + - + Loss Functions — molearn 2.0.1 documentation - - - - - - - + + + + + @@ -45,10 +42,10 @@

Navigation

-

Loss Functions

+

Loss Functions

-class ModifiedForceField(*args, alternative_residue_names=None, **kwargs)[source]
+class ModifiedForceField(*args, alternative_residue_names=None, **kwargs)[source]

Takes all *args and **kwargs of openmm.app.ForceField, plus an optional parameter described here.

Parameters:
@@ -59,7 +56,7 @@

Navigation

-class OpenMMPluginScoreSoftForceField(mol=None, platform='CUDA', atoms=['CA', 'C', 'N', 'CB', 'O'])[source]
+class OpenMMPluginScoreSoftForceField(mol=None, platform='CUDA', atoms=['CA', 'C', 'N', 'CB', 'O'])[source]
Parameters:
    @@ -77,7 +74,7 @@

    Navigation

    -class OpenmmPluginScore(mol=None, xml_file=['amber14-all.xml'], platform='CUDA', remove_NB=False, alternative_residue_names={'HIS': 'HIE', 'HSE': 'HIE'}, atoms=['CA', 'C', 'N', 'CB', 'O'], soft=False)[source]
    +class OpenmmPluginScore(mol=None, xml_file=['amber14-all.xml'], platform='CUDA', remove_NB=False, alternative_residue_names={'HIS': 'HIE', 'HSE': 'HIE'}, atoms=['CA', 'C', 'N', 'CB', 'O'], soft=False)[source]

    This will use the new OpenMM Plugin to calculate forces and energy. The intention is that this will be fast enough to be able to calculate forces and energy during training. N.B.: The current torchintegratorplugin only supports float on GPU and double on CPU.

    @@ -95,7 +92,7 @@

    Navigation

    -execute(x)[source]
    +execute(x)[source]
    Parameters:

    x (torch.Tensor) – shape [b, N, 3]. dtype=float. device = gpu

    @@ -105,7 +102,7 @@

    Navigation

    -get_energy(pos_ptr, force_ptr, energy_ptr, n_particles, batch_size)[source]
    +get_energy(pos_ptr, force_ptr, energy_ptr, n_particles, batch_size)[source]
    Parameters:
      @@ -123,7 +120,7 @@

      Navigation

      -class OpenmmTorchEnergyMinimizer(mol=None, xml_file=['amber14-all.xml'], platform='CUDA', remove_NB=False, alternative_residue_names={'HIS': 'HIE', 'HSE': 'HIE'}, atoms=['CA', 'C', 'N', 'CB', 'O'], soft=False)[source]
      +class OpenmmTorchEnergyMinimizer(mol=None, xml_file=['amber14-all.xml'], platform='CUDA', remove_NB=False, alternative_residue_names={'HIS': 'HIE', 'HSE': 'HIE'}, atoms=['CA', 'C', 'N', 'CB', 'O'], soft=False)[source]
      Parameters:
        @@ -141,10 +138,10 @@

        Navigation

        -class openmm_clamped_energy_function(*args, **kwargs)[source]
        +class openmm_clamped_energy_function(*args, **kwargs)[source]
        -static backward(ctx, grad_output)[source]
        +static backward(ctx, grad_output)[source]

        Defines a formula for differentiating the operation with backward mode automatic differentiation (alias to the vjp function).

        This function is to be overridden by all subclasses.

        @@ -166,7 +163,7 @@

        Navigation

        -static forward(ctx, plugin, x, clamp)[source]
        +static forward(ctx, plugin, x, clamp)[source]
        Parameters:
          @@ -184,16 +181,16 @@

          Navigation

          -class openmm_energy(mol, std, clamp=None, **kwargs)[source]
          +class openmm_energy(mol, std, clamp=None, **kwargs)[source]

          Initializes internal Module state, shared by both nn.Module and ScriptModule.

          -class openmm_energy_function(*args, **kwargs)[source]
          +class openmm_energy_function(*args, **kwargs)[source]
          -static backward(ctx, grad_output)[source]
          +static backward(ctx, grad_output)[source]

          Defines a formula for differentiating the operation with backward mode automatic differentiation (alias to the vjp function).

          This function is to be overridden by all subclasses.

          @@ -215,7 +212,7 @@

          Navigation

          -static forward(ctx, plugin, x)[source]
          +static forward(ctx, plugin, x)[source]
          Parameters:
            @@ -240,6 +237,34 @@

            Navigation