diff --git a/README.md b/README.md index bcf2444..7963f7d 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,9 @@ [![status](https://joss.theoj.org/papers/781a409020f1c37417067aef6fbc3217/status.svg)](https://joss.theoj.org/papers/781a409020f1c37417067aef6fbc3217) [![Documentation Status](https://readthedocs.org/projects/molearn/badge/?version=latest)](https://molearn.readthedocs.io/en/latest/?badge=latest) +[![DOI](https://zenodo.org/badge/145391811.svg)](https://zenodo.org/badge/latestdoi/145391811) + + *protein conformational spaces meet machine learning* @@ -42,15 +45,21 @@ To run the GUI: ## Installation ## +#### Anaconda installation from conda-forge #### + The most recent release can be obtained through Anaconda: `conda install molearn -c conda-forge` or the much faster `mamba install -c conda-forge molearn` +#### Clone the repo and manually install #### + Manual installation requires the following three steps: -* Clone the repository -* Install the necessary requirements with `mamba install -c conda-forge --only-deps molearn`. The option `--only-deps` will install the molearn dependencies but not molearn itself. +* Clone the repository `git clone https://github.com/Degiacomi-Lab/molearn.git` +* Install all required packages (see section *Dependencies > Required Packages*, above). The easiest way is by calling `mamba install -c conda-forge --only-deps molearn`, where the option `--only-deps` will install the molearn required dependencies but not molearn itself. Optionally, packages enabling additional molearn functionalities can also be installed. This has to be done manually (see links in *Dependencies > Optional Packages*). * Use pip to install molearn from within the molearn directory `python -m pip install .` +#### Using molearn without installation #### + Molearn can used without installation by making the sure the requirements above are met, and adding the `src` directory to your path at the beginning of every script, e.g.: ``` import sys @@ -58,18 +67,23 @@ sys.path.insert(0, 'path/to/molearn/src') import molearn ``` + + ## Usage ## * See example scripts in the `examples` folder. * Jupyter notebook tutorials describing the usage of a trained neural network are available [here](https://github.com/Degiacomi-Lab/molearn_notebook). * software API and a FAQ page are available at [molearn.readthedocs.io](https://molearn.readthedocs.io/). -## Reference ## +## References ## + +If you use `molearn` in your work, please cite: [S.C. Musson and M.T. Degiacomi (2023). Molearn: a Python package streamlining the design of generative models of biomolecular dynamics. Journal of Open Source Software, 8(89), 5523](https://doi.org/10.21105/joss.05523) -If you use molearn in your work, please cite: +Theory and benchmarks of a neural network training against protein conformational spaces are presented here: [V.K. Ramaswamy, S.C. Musson, C.G. Willcocks, M.T. Degiacomi (2021). Learning protein conformational space with convolutions and latent interpolations, Physical Review X 11]( https://journals.aps.org/prx/abstract/10.1103/PhysRevX.11.011052) -## Contact ## +## Contributing ## -For any question please contact samuel.musson@durham.ac.uk +For information on how to report bugs, request new features, or contribute to the code, please see [CONTRIBUTING.md](CONTRIBUTING.md). +For any other question please contact matteo.t.degiacomi@durham.ac.uk. diff --git a/paper/paper.md b/paper/paper.md index 0900e67..0158418 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -17,7 +17,7 @@ authors: corresponding: true affiliation: 1 affiliations: - - name: Department of Physics, Durham University, UK + - name: Department of Physics, Durham University, United Kingdom index: 1 date: 09 May 2023 bibliography: paper.bib diff --git a/src/molearn/analysis/analyser.py b/src/molearn/analysis/analyser.py index 83bfe8d..39e1a0c 100644 --- a/src/molearn/analysis/analyser.py +++ b/src/molearn/analysis/analyser.py @@ -21,7 +21,16 @@ print('Error importing modeller: ') print(e) -from ..scoring import Parallel_DOPE_Score, Parallel_Ramachandran_Score +try: + from ..scoring import Parallel_DOPE_Score +except ImportError as e: + print('Import Error captured while trying to import Parallel_DOPE_Score, it is likely that you dont have Modeller installed') + print(e) +try: + from ..scoring import Parallel_Ramachandran_Score +except ImportError as e: + print('Import Error captured while trying to import Parallel_Ramachandran_Score, it is likely that you dont have cctbx/iotbx installed') + print(e) from ..data import PDBData from ..utils import as_numpy diff --git a/src/molearn/data/pdb_data.py b/src/molearn/data/pdb_data.py index 36f4320..9a002b4 100644 --- a/src/molearn/data/pdb_data.py +++ b/src/molearn/data/pdb_data.py @@ -68,6 +68,10 @@ def atomselect(self, atoms, ignore_atoms=[]): _plain_atoms.append(self._mol.knowledge['atomtype'][a]) elif a[:-1] in self._mol.knowledge['atomtype']: _plain_atoms.append(self._mol.knowledge['atomtype'][a[:-1]]) + print(f'Could not find {a}. I am assuing you meant {a[:-1]} instead.') + elif a[:-2] in self._mol.knowledge['atomtype']: + _plain_atoms.append(self._mol.knowledge['atomtype'][a[:-2]]) + print(f'Could not find {a}. I am assuming you meant {a[:-2]} instead.') else: _plain_atoms.append(self._mol.knowledge['atomtype'][a]) # if above failed just raise the keyerror _atoms = [atom for atom, element in zip(_atoms, _plain_atoms) if element != 'H'] diff --git a/src/molearn/loss_functions/openmm_thread.py b/src/molearn/loss_functions/openmm_thread.py index 2f20c11..e38a874 100644 --- a/src/molearn/loss_functions/openmm_thread.py +++ b/src/molearn/loss_functions/openmm_thread.py @@ -14,6 +14,7 @@ import torch import numpy as np +from copy import deepcopy class ModifiedForceField(ForceField): @@ -193,6 +194,12 @@ def ignore_hydrogen(self): self.forcefield.registerPatch(patchData) def atomselect(self, atoms): + atoms = deepcopy(atoms) + if 'OT2' in atoms: + atoms.append('OXT') + if 'OT1' in atoms: + atoms.append('OXT') + for name, template in self.forcefield._templates.items(): patchData = ForceField._PatchData(name+'_leave_only_'+'_'.join(atoms), 1) diff --git a/src/molearn/scoring/__init__.py b/src/molearn/scoring/__init__.py index 0309778..309e273 100644 --- a/src/molearn/scoring/__init__.py +++ b/src/molearn/scoring/__init__.py @@ -1,15 +1,26 @@ """ `Scoring` holds classes for calculating DOPE and Ramachandran scores. """ +class RaiseErrorOnInit: + module = 'unknown module is creating an ImportError' + def __init__(self,*args, **kwargs): + raise ImportError(f'{self.module}. Therefore {self.__class__.__name__} can not be used') try: from .dope_score import Parallel_DOPE_Score, DOPE_Score except ImportError as e: import warnings warnings.warn(f"{e}. Modeller is probably not installed.") - + class DOPE_Score(RaiseErrorOnInit): + module = e + class Parallel_DOPE_Score(RaiseErrorOnInit): + module = e try: from .ramachandran_score import Parallel_Ramachandran_Score, Ramachandran_Score except Exception as e: + class Parallel_Ramachandran_Score(RaiseErrorOnInit): + module = e + class Ramachandran_Score(RaiseErrorOnInit): + module = e import warnings warnings.warn(f"{e}. Will not be able to calculate Ramachandran score.") \ No newline at end of file diff --git a/src/molearn/scoring/dope_score.py b/src/molearn/scoring/dope_score.py index 21fb2aa..918358e 100644 --- a/src/molearn/scoring/dope_score.py +++ b/src/molearn/scoring/dope_score.py @@ -19,6 +19,7 @@ class DOPE_Score: ''' This class contains methods to calculate dope without saving to save and load PDB files for every structure. Atoms in a biobox coordinate tensor are mapped to the coordinates in the modeller model directly. ''' + atom_map = {('ILE', 'CD1'):('ILE', 'CD')} def __init__(self, mol): ''' @@ -55,12 +56,20 @@ def __init__(self, mol): else: where_arg = (atom_residue==(np.array([j.name, j_residue_name, j.residue.index+offset], dtype=object))).all(axis=1) where = np.where(where_arg)[0] + if len(where)==0: + if (j_residue_name, j.name) in self.atom_map: + alt_residue_name, alt_name = self.atom_map[(j_residue_name, j.name)] + where_arg = (atom_residue==(np.array([alt_name, alt_residue_name, j.residue.index+offset], dtype=object))).all(axis=1) + where = np.where(where_arg)[0] + else: + print(f'Cant find {j.name} in the atoms {atom_residue[atom_residue[:,2]==j.residue.index+offset]} try adding a mapping to DOPE_Score.atom_map') atom_order.append(int(where)) self.fast_atom_order = atom_order # check fast dope atoms + reverse_map = {value:key for key, value in self.atom_map.items()} for i, j in enumerate(self.fast_ss): if i