From 13a5df0fcbf13852da5613cefd84708e1fd506c6 Mon Sep 17 00:00:00 2001 From: Yuxuan Zhuang Date: Sat, 10 Apr 2021 12:37:07 +0200 Subject: [PATCH] New TransformationBase class (#2950) Fixes #2996 ## Work done in this PR - Adds a TransformationBase class - TransformationBase uses threadpoolctl to allow the number of threads used to be limited in order to improve performance. - TransformationBase also includes a check for whether a transformation is parallelizable. - Refactors existing transformation classes to use TransformationBase. --- .github/workflows/gh-ci.yaml | 2 +- .travis.yml | 4 +- azure-pipelines.yml | 1 + package/CHANGELOG | 3 + .../MDAnalysis/transformations/__init__.py | 1 + package/MDAnalysis/transformations/base.py | 124 ++++++++++++++++++ .../transformations/boxdimensions.py | 13 +- package/MDAnalysis/transformations/fit.py | 42 ++++-- .../transformations/positionaveraging.py | 19 ++- package/MDAnalysis/transformations/rotate.py | 21 ++- .../MDAnalysis/transformations/translate.py | 34 ++++- package/MDAnalysis/transformations/wrap.py | 27 +++- .../trajectory_transformations.rst | 49 ++++++- .../transformations/base.rst | 1 + package/requirements.txt | 1 + package/setup.py | 1 + .../transformations/test_base.py | 104 +++++++++++++++ 17 files changed, 407 insertions(+), 40 deletions(-) create mode 100644 package/MDAnalysis/transformations/base.py create mode 100644 package/doc/sphinx/source/documentation_pages/transformations/base.rst create mode 100644 testsuite/MDAnalysisTests/transformations/test_base.py diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml index 954a3852b67..9398d0f722c 100644 --- a/.github/workflows/gh-ci.yaml +++ b/.github/workflows/gh-ci.yaml @@ -14,7 +14,7 @@ defaults: shell: bash -l {0} env: - MDA_CONDA_MIN_DEPS: "pip pytest==6.1.2 mmtf-python biopython networkx cython matplotlib-base scipy griddataformats hypothesis gsd codecov" + MDA_CONDA_MIN_DEPS: "pip pytest==6.1.2 mmtf-python biopython networkx cython matplotlib-base scipy griddataformats hypothesis gsd codecov threadpoolctl" MDA_CONDA_EXTRA_DEPS: "seaborn>=0.7.0 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12 chemfiles tqdm>=4.43.0 tidynamics>=1.0.0 rdkit>=2020.03.1 h5py" MDA_PIP_MIN_DEPS: 'coveralls coverage<5 pytest-cov pytest-xdist' MDA_PIP_EXTRA_DEPS: 'duecredit parmed' diff --git a/.travis.yml b/.travis.yml index 3be2f063d1b..14b77d49953 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,7 +25,7 @@ matrix: - conda activate base - conda update --yes conda - conda install --yes pip pytest==6.1.2 mmtf-python biopython networkx cython matplotlib-base "scipy>=1.6" griddataformats hypothesis gsd codecov -c conda-forge -c biobuilds - - pip install pytest-xdist + - pip install pytest-xdist threadpoolctl - conda info install: - (cd package/ && python setup.py develop) && (cd testsuite/ && python setup.py install) @@ -45,7 +45,7 @@ matrix: if: type = cron before_install: - python -m pip install cython numpy scipy - - python -m pip install --no-build-isolation hypothesis matplotlib pytest pytest-cov pytest-xdist tqdm + - python -m pip install --no-build-isolation hypothesis matplotlib pytest pytest-cov pytest-xdist tqdm threadpoolctl install: - cd package - python setup.py install diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 49abbd832fa..fde17babf5e 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -59,6 +59,7 @@ jobs: scipy h5py tqdm + threadpoolctl displayName: 'Install dependencies' # TODO: recent rdkit is not on PyPI - script: >- diff --git a/package/CHANGELOG b/package/CHANGELOG index 7f1ce2030ff..a099fc7a06a 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -156,6 +156,8 @@ Enhancements 'protein' selection (#2751 PR #2755) * Added an RDKit converter that works for any input with all hydrogens explicit in the topology (Issue #2468, PR #2775) + * Added `TransformationBase`. Implemented threadlimit and add an attribute for + checking if it can be used in parallel analysis. (Issue #2996, PR #2950) Changes * Fixed inaccurate docstring inside the RMSD class (Issue #2796, PR #3134) @@ -198,6 +200,7 @@ Changes * deprecated ``analysis.helanal`` module has been removed in favour of ``analysis.helix_analysis`` (PR #2929) * Move water bridge analysis from hbonds to hydrogenbonds (Issue #2739 PR #2913) + * `threadpoolctl` is required for installation (Issue #2860) Deprecations diff --git a/package/MDAnalysis/transformations/__init__.py b/package/MDAnalysis/transformations/__init__.py index 7fd56d2eab6..e9508cddb35 100644 --- a/package/MDAnalysis/transformations/__init__.py +++ b/package/MDAnalysis/transformations/__init__.py @@ -125,6 +125,7 @@ def wrapped(ts): method instead of being written as a function/closure. """ +from .base import TransformationBase from .translate import translate, center_in_box from .rotate import rotateby from .positionaveraging import PositionAverager diff --git a/package/MDAnalysis/transformations/base.py b/package/MDAnalysis/transformations/base.py new file mode 100644 index 00000000000..497ab5937eb --- /dev/null +++ b/package/MDAnalysis/transformations/base.py @@ -0,0 +1,124 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# + +"""\ +Transformations Base Class --- :mod:`MDAnalysis.transformations.base` +===================================================================== + +.. autoclass:: TransformationBase + +""" +from threadpoolctl import threadpool_limits + + +class TransformationBase(object): + """Base class for defining on-the-fly transformations + + The class is designed as a template for creating on-the-fly + Transformation classes. This class will + + 1) set up a context manager framework on limiting the threads + per call, which may be the multi-thread OpenBlas backend of NumPy. + This backend may kill the performance when subscribing hyperthread + or oversubscribing the threads when used together with other parallel + engines e.g. Dask. + (PR `#2950 `_) + + Define ``max_threads=1`` when that is the case. + + 2) set up a boolean attribute `parallelizable` for checking if the + transformation can be applied in a **split-apply-combine** parallelism. + For example, the :class:`~MDAnalysis.transformations.positionaveraging.PositionAverager` + is history-dependent and can not be used in parallel analysis natively. + (Issue `#2996 `_) + + To define a new Transformation, :class:`TransformationBase` + has to be subclassed. + ``max_threads`` will be set to ``None`` by default, + i.e. does not do anything and any settings in the environment such as + the environment variable :envvar:`OMP_NUM_THREADS` + (see the `OpenMP specification for OMP_NUM_THREADS `_) + are used. + ``parallelizable`` will be set to ``True`` by default. + You may need to double check if it can be used in parallel analysis; + if not, override the value to ``False``. + Note this attribute is not checked anywhere in MDAnalysis yet. + Developers of the parallel analysis have to check it in their own code. + + .. code-block:: python + + class NewTransformation(TransformationBase): + def __init__(self, ag, parameter, + max_threads=1, parallelizable=True): + super().__init__(max_threads=max_threads, + parallelizable=parallelizable) + self.ag = ag + self._param = parameter + + def _transform(self, ts): + # REQUIRED + ts.positions = some_function(ts, self.ag, self._param) + return ts + + Afterwards the new transformation can be run like this. + + .. code-block:: python + + new_transformation = NewTransformation(ag, param) + u.trajectory.add_transformations(new_transformation) + + + .. versionadded:: 2.0.0 + Add the base class for all transformations to limit threads and + check if it can be used in parallel analysis. + """ + + def __init__(self, **kwargs): + """ + Parameters + ---------- + max_threads: int, optional + The maximum thread number can be used. + Default is ``None``, which means the default or the external setting. + parallelizable: bool, optional + A check for if this can be used in split-apply-combine parallel + analysis approach. + Default is ``True``. + """ + self.max_threads = kwargs.pop('max_threads', None) + self.parallelizable = kwargs.pop('parallelizable', True) + + def __call__(self, ts): + """The function that makes transformation can be called as a function + + The thread limit works as a context manager with given `max_threads` + wrapping the real :func:`_transform` function + """ + with threadpool_limits(self.max_threads): + return self._transform(ts) + + def _transform(self, ts): + """Transform the given `Timestep` + + It deals with the transformation of a single `Timestep`. + """ + raise NotImplementedError("Only implemented in child classes") diff --git a/package/MDAnalysis/transformations/boxdimensions.py b/package/MDAnalysis/transformations/boxdimensions.py index 5a6569caa90..ad6c9bf52c5 100644 --- a/package/MDAnalysis/transformations/boxdimensions.py +++ b/package/MDAnalysis/transformations/boxdimensions.py @@ -32,8 +32,10 @@ """ import numpy as np +from .base import TransformationBase -class set_dimensions: + +class set_dimensions(TransformationBase): """ Set simulation box dimensions. @@ -63,7 +65,12 @@ class set_dimensions: :class:`~MDAnalysis.coordinates.base.Timestep` object """ - def __init__(self, dimensions): + def __init__(self, + dimensions, + max_threads=None, + parallelizable=True): + super().__init__(max_threads=max_threads, + parallelizable=parallelizable) self.dimensions = dimensions try: @@ -82,6 +89,6 @@ def __init__(self, dimensions): '[a, b, c, alpha, beta, gamma]') raise ValueError(errmsg) - def __call__(self, ts): + def _transform(self, ts): ts.dimensions = self.dimensions return ts diff --git a/package/MDAnalysis/transformations/fit.py b/package/MDAnalysis/transformations/fit.py index 85a2c93e615..98ace818c96 100644 --- a/package/MDAnalysis/transformations/fit.py +++ b/package/MDAnalysis/transformations/fit.py @@ -37,8 +37,10 @@ from ..analysis import align from ..lib.transformations import euler_from_matrix, euler_matrix +from .base import TransformationBase -class fit_translation(object): + +class fit_translation(TransformationBase): """Translates a given AtomGroup so that its center of geometry/mass matches the respective center of the given reference. A plane can be given by the user using the option `plane`, and will result in the removal of @@ -82,10 +84,17 @@ class fit_translation(object): .. versionchanged:: 2.0.0 - The transformation was changed from a function/closure to a class - with ``__call__``. + The transformation was changed from a function/closure to a class + with ``__call__``. + .. versionchanged:: 2.0.0 + The transformation was changed to inherit from the base class for + limiting threads and checking if it can be used in parallel analysis. """ - def __init__(self, ag, reference, plane=None, weights=None): + def __init__(self, ag, reference, plane=None, weights=None, + max_threads=None, parallelizable=True): + super().__init__(max_threads=max_threads, + parallelizable=parallelizable) + self.ag = ag self.reference = reference self.plane = plane @@ -117,7 +126,7 @@ def __init__(self, ag, reference, plane=None, weights=None): self.weights = align.get_weights(self.ref.atoms, weights=self.weights) self.ref_com = self.ref.center(self.weights) - def __call__(self, ts): + def _transform(self, ts): mobile_com = np.asarray(self.mobile.atoms.center(self.weights), np.float32) vector = self.ref_com - mobile_com @@ -128,7 +137,7 @@ def __call__(self, ts): return ts -class fit_rot_trans(object): +class fit_rot_trans(TransformationBase): """Perform a spatial superposition by minimizing the RMSD. Spatially align the group of atoms `ag` to `reference` by doing a RMSD @@ -140,6 +149,11 @@ class fit_rot_trans(object): removed. This is useful for protein-membrane systems to where the membrane must remain in the same orientation. + Note + ---- + ``max_threads`` is set to 1 for this transformation + with which it performs better. + Example ------- Removing the translations and rotations of a given AtomGroup `ag` on the XY plane @@ -174,8 +188,20 @@ class fit_rot_trans(object): Returns ------- MDAnalysis.coordinates.base.Timestep + + + .. versionchanged:: 2.0.0 + The transformation was changed from a function/closure to a class + with ``__call__``. + .. versionchanged:: 2.0.0 + The transformation was changed to inherit from the base class for + limiting threads and checking if it can be used in parallel analysis. """ - def __init__(self, ag, reference, plane=None, weights=None): + def __init__(self, ag, reference, plane=None, weights=None, + max_threads=1, parallelizable=True): + super().__init__(max_threads=max_threads, + parallelizable=parallelizable) + self.ag = ag self.reference = reference self.plane = plane @@ -207,7 +233,7 @@ def __init__(self, ag, reference, plane=None, weights=None): self.ref_com = self.ref.center(self.weights) self.ref_coordinates = self.ref.atoms.positions - self.ref_com - def __call__(self, ts): + def _transform(self, ts): mobile_com = self.mobile.atoms.center(self.weights) mobile_coordinates = self.mobile.atoms.positions - mobile_com rotation, dump = align.rotation_matrix(mobile_coordinates, diff --git a/package/MDAnalysis/transformations/positionaveraging.py b/package/MDAnalysis/transformations/positionaveraging.py index 930a77774b3..57b4d66465c 100644 --- a/package/MDAnalysis/transformations/positionaveraging.py +++ b/package/MDAnalysis/transformations/positionaveraging.py @@ -35,8 +35,10 @@ import numpy as np import warnings +from .base import TransformationBase -class PositionAverager(object): + +class PositionAverager(TransformationBase): """ Averages the coordinates of a given timestep so that the coordinates of the AtomGroup correspond to the average positions of the N previous @@ -132,11 +134,18 @@ class PositionAverager(object): Returns ------- MDAnalysis.coordinates.base.Timestep - - + + + .. versionchanged:: 2.0.0 + The transformation was changed to inherit from the base class for + limiting threads and checking if it can be used in parallel analysis. """ - def __init__(self, avg_frames, check_reset=True): + def __init__(self, avg_frames, check_reset=True, + max_threads=None, + parallelizable=False): + super().__init__(max_threads=max_threads, + parallelizable=parallelizable) self.avg_frames = avg_frames self.check_reset = check_reset self.current_avg = 0 @@ -162,7 +171,7 @@ def rollposx(self, ts): self.coord_array = np.roll(self.coord_array, 1, axis=2) self.coord_array[..., 0] = ts.positions.copy() - def __call__(self, ts): + def _transform(self, ts): # calling the same timestep will not add new data to coord_array # This can prevent from getting different values when # call `u.trajectory[i]` multiple times. diff --git a/package/MDAnalysis/transformations/rotate.py b/package/MDAnalysis/transformations/rotate.py index afbba495a61..0c9bd80fc96 100644 --- a/package/MDAnalysis/transformations/rotate.py +++ b/package/MDAnalysis/transformations/rotate.py @@ -37,14 +37,21 @@ from ..lib.transformations import rotation_matrix from ..lib.util import get_weights +from .base import TransformationBase -class rotateby(object): + +class rotateby(TransformationBase): ''' Rotates the trajectory by a given angle on a given axis. The axis is defined by the user, combining the direction vector and a point. This point can be the center of geometry or the center of mass of a user defined AtomGroup, or an array defining custom coordinates. + Note + ---- + ``max_threads`` is set to 1 for this transformation + with which it performs better. + Examples -------- @@ -111,6 +118,9 @@ class rotateby(object): .. versionchanged:: 2.0.0 The transformation was changed from a function/closure to a class with ``__call__``. + .. versionchanged:: 2.0.0 + The transformation was changed to inherit from the base class for + limiting threads and checking if it can be used in parallel analysis. ''' def __init__(self, angle, @@ -118,7 +128,12 @@ def __init__(self, point=None, ag=None, weights=None, - wrap=False): + wrap=False, + max_threads=1, + parallelizable=True): + super().__init__(max_threads=max_threads, + parallelizable=parallelizable) + self.angle = angle self.direction = direction self.point = point @@ -162,7 +177,7 @@ def __init__(self, else: raise ValueError('A point or an AtomGroup must be specified') - def __call__(self, ts): + def _transform(self, ts): if self.point is None: position = self.center_method() else: diff --git a/package/MDAnalysis/transformations/translate.py b/package/MDAnalysis/transformations/translate.py index 1b1be2d92ed..a69d7757146 100644 --- a/package/MDAnalysis/transformations/translate.py +++ b/package/MDAnalysis/transformations/translate.py @@ -39,14 +39,18 @@ import numpy as np from functools import partial +from .base import TransformationBase -class translate(object): + +class translate(TransformationBase): """ Translates the coordinates of a given :class:`~MDAnalysis.coordinates.base.Timestep` instance by a given vector. Example ------- + .. code-block:: python + ts = MDAnalysis.transformations.translate([1,2,3])(ts) Parameters @@ -58,8 +62,19 @@ class translate(object): ------- :class:`~MDAnalysis.coordinates.base.Timestep` object + + .. versionchanged:: 2.0.0 + The transformation was changed from a function/closure to a class + with ``__call__``. + .. versionchanged:: 2.0.0 + The transformation was changed to inherit from the base class for + limiting threads and checking if it can be used in parallel analysis. """ - def __init__(self, vector): + def __init__(self, vector, + max_threads=None, parallelizable=True): + super().__init__(max_threads=max_threads, + parallelizable=parallelizable) + self.vector = vector if len(self.vector) > 2: @@ -67,12 +82,12 @@ def __init__(self, vector): else: raise ValueError("{} vector is too short".format(self.vector)) - def __call__(self, ts): + def _transform(self, ts): ts.positions += self.vector return ts -class center_in_box(object): +class center_in_box(TransformationBase): """ Translates the coordinates of a given :class:`~MDAnalysis.coordinates.base.Timestep` instance so that the center of geometry/mass of the given :class:`~MDAnalysis.core.groups.AtomGroup` @@ -111,8 +126,15 @@ class center_in_box(object): .. versionchanged:: 2.0.0 The transformation was changed from a function/closure to a class with ``__call__``. + .. versionchanged:: 2.0.0 + The transformation was changed to inherit from the base class for + limiting threads and checking if it can be used in parallel analysis. """ - def __init__(self, ag, center='geometry', point=None, wrap=False): + def __init__(self, ag, center='geometry', point=None, wrap=False, + max_threads=None, parallelizable=True): + super().__init__(max_threads=max_threads, + parallelizable=parallelizable) + self.ag = ag self.center = center self.point = point @@ -140,7 +162,7 @@ def __init__(self, ag, center='geometry', point=None, wrap=False): raise ValueError(f'{self.ag} is not an AtomGroup object') \ from None - def __call__(self, ts): + def _transform(self, ts): if self.point is None: boxcenter = np.sum(ts.triclinic_dimensions, axis=0) / 2 else: diff --git a/package/MDAnalysis/transformations/wrap.py b/package/MDAnalysis/transformations/wrap.py index 10e565fc072..1b3246b639e 100644 --- a/package/MDAnalysis/transformations/wrap.py +++ b/package/MDAnalysis/transformations/wrap.py @@ -36,8 +36,10 @@ from ..lib._cutil import make_whole +from .base import TransformationBase -class wrap(object): + +class wrap(TransformationBase): """ Shift the contents of a given AtomGroup back into the unit cell. :: @@ -84,17 +86,24 @@ class wrap(object): .. versionchanged:: 2.0.0 The transformation was changed from a function/closure to a class with ``__call__``. + .. versionchanged:: 2.0.0 + The transformation was changed to inherit from the base class for + limiting threads and checking if it can be used in parallel analysis. """ - def __init__(self, ag, compound='atoms'): + def __init__(self, ag, compound='atoms', + max_threads=None, parallelizable=True): + super().__init__(max_threads=max_threads, + parallelizable=parallelizable) + self.ag = ag self.compound = compound - def __call__(self, ts): + def _transform(self, ts): self.ag.wrap(compound=self.compound) return ts -class unwrap(object): +class unwrap(TransformationBase): """ Move all atoms in an AtomGroup so that bonds don't split over images @@ -138,8 +147,14 @@ class unwrap(object): .. versionchanged:: 2.0.0 The transformation was changed from a function/closure to a class with ``__call__``. + .. versionchanged:: 2.0.0 + The transformation was changed to inherit from the base class for + limiting threads and checking if it can be used in parallel analysis. """ - def __init__(self, ag): + def __init__(self, ag, max_threads=None, parallelizable=True): + super().__init__(max_threads=max_threads, + parallelizable=parallelizable) + self.ag = ag try: @@ -147,7 +162,7 @@ def __init__(self, ag): except AttributeError: raise AttributeError("{} has no fragments".format(self.ag)) - def __call__(self, ts): + def _transform(self, ts): for frag in self.ag.fragments: make_whole(frag) return ts diff --git a/package/doc/sphinx/source/documentation_pages/trajectory_transformations.rst b/package/doc/sphinx/source/documentation_pages/trajectory_transformations.rst index 4d885cb0c45..aafc31976f8 100644 --- a/package/doc/sphinx/source/documentation_pages/trajectory_transformations.rst +++ b/package/doc/sphinx/source/documentation_pages/trajectory_transformations.rst @@ -90,7 +90,7 @@ being added. Creating transformations ------------------------ -A simple *transformation* can also be a function that takes a +A simple *transformation* can also be a function that takes a :class:`~MDAnalysis.coordinates.base.Timestep` as input, modifies it, and returns it. If it takes no other arguments but a :class:`Timestep` can be defined as the following example: @@ -113,17 +113,23 @@ the following two methods can be used to create such transformation: Creating complex transformation classes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -It is implemented by defining :func:`__call__` for the transformation class -and can be applied directly to a :class:`Timestep`. +It is implemented by inheriting from +:class:`MDAnalysis.transformations.base.TransformationBase`, +which defines :func:`__call__` for the transformation class +and can be applied directly to a :class:`Timestep`. :func:`_transform` has to +be defined and include the operations on the :class:`MDAnalysis.coordinates.base.Timestep`. + So, a transformation class can be roughly defined as follows: .. code-block:: python - class up_by_x_class(object): + from MDAnalysis.transformations import TransformationBase + + class up_by_x_class(TransformationBase): def __init__(self, distance): self.distance = distance - def __call__(self, ts): + def _transform(self, ts): ts.positions = ts.positions + np.array([0, 0, self.distance], dtype=np.float32) return ts @@ -185,7 +191,14 @@ any of these transformations, the module must first be imported: import MDAnalysis.transformations -A workflow can then be added to a trajectory as described above. +A workflow can then be added to a trajectory as described above. Notably, +the parameter `max_threads` can be defined when creating a transformation +instance to limit the maximum threads. +(See :class:`MDAnalysis.transformations.base.TransformationBase` for more details) +Whether a specific transformation can be used along with parallel analysis +can be assessed by checking its +:attr:`~MDAnalysis.transformations.base.TransformationBase.parallelizable` +attribute. See :ref:`implemented-transformations` for more on the existing transformations in :mod:`MDAnalysis.transformations`. @@ -221,6 +234,30 @@ Giving a workflow as a keyword argument when defining the universe: u = MDAnalysis.Universe(topology, trajectory, transformations=workflow) +.. _building-block-transformation: + +Building blocks for Transformation Classes +------------------------------------------ +Transformations normally ultilize the power of NumPy to get better performance +on array operations. However, when it comes to parallelism, NumPy will sometimes +oversubscribe the threads, either by hyper threading (when it uses OpenBlas backend), +or by working with other parallel engines (e.g. Dask). + +In MDAnalysis, we use `threadpoolctl `_ +inside :class:`~MDAnalysis.transformations.base.TransformationBase` to control the maximum threads for transformations. + +It is also possible to apply a global thread limit by setting the external environmental +varibale, e.g. :code:`OMP_NUM_THREADS=1 MKL_NUM_THREADS=1 OPENBLAS_NUM_THREADS=1 +BLIS_NUM_THREADS=1 python script.py`. Read more about parallelism and resource management +in `scikit-learn documentations `_. + +Users are advised to benchmark code because interaction between different +libraries can lead to sub-optimal performance with defaults. + +.. toctree:: + + ./transformations/base + .. _implemented-transformations: Currently implemented transformations diff --git a/package/doc/sphinx/source/documentation_pages/transformations/base.rst b/package/doc/sphinx/source/documentation_pages/transformations/base.rst new file mode 100644 index 00000000000..d5a0377540d --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/transformations/base.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.transformations.base diff --git a/package/requirements.txt b/package/requirements.txt index 684bce83d01..54e993f063d 100644 --- a/package/requirements.txt +++ b/package/requirements.txt @@ -20,4 +20,5 @@ seaborn>=0.7.0,<=0.9 sphinx==1.8.5 sphinx_rtd_theme sphinx_sitemap +threadpoolctl tqdm diff --git a/package/setup.py b/package/setup.py index 474b6ff2eda..bdbe9ade8a7 100755 --- a/package/setup.py +++ b/package/setup.py @@ -584,6 +584,7 @@ def long_description(readme): 'scipy>=1.0.0', 'matplotlib>=1.5.1', 'tqdm>=4.43.0', + 'threadpoolctl', ] if not os.name == 'nt': install_requires.append('gsd>=1.4.0') diff --git a/testsuite/MDAnalysisTests/transformations/test_base.py b/testsuite/MDAnalysisTests/transformations/test_base.py new file mode 100644 index 00000000000..acf76ee4df8 --- /dev/null +++ b/testsuite/MDAnalysisTests/transformations/test_base.py @@ -0,0 +1,104 @@ + +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# +import numpy as np +import pytest +from numpy.testing import assert_equal +from threadpoolctl import threadpool_info + +import MDAnalysis as mda +from MDAnalysisTests.datafiles import PSF, DCD +from MDAnalysis.transformations.base import TransformationBase + + +class DefaultTransformation(TransformationBase): + """Default values for max_threads and parallelizable""" + def __init__(self): + super().__init__() + + def _transform(self, ts): + self.runtime_info = threadpool_info() + ts.positions = ts.positions + 1 + return ts + + +class NoTransform_Transformation(TransformationBase): + """Default values for max_threads and parallelizable""" + def __init__(self): + super().__init__() + + +class CustomTransformation(TransformationBase): + """Custom value for max_threads and parallelizable""" + def __init__(self, max_threads=1, parallelizable=False): + super().__init__(max_threads=max_threads, + parallelizable=parallelizable) + + def _transform(self, ts): + self.runtime_info = threadpool_info() + ts.positions = ts.positions + 1 + return ts + + +@pytest.fixture(scope='module') +def u(): + return mda.Universe(PSF, DCD) + + +def test_default_value(): + new_trans = DefaultTransformation() + assert new_trans.max_threads is None + assert new_trans.parallelizable is True + + +def test_no_transform_function(u): + new_trans = NoTransform_Transformation() + with pytest.raises(NotImplementedError, match=r"Only implemented"): + _ = new_trans._transform(u.trajectory.ts) + + +def test_custom_value(): + new_trans = CustomTransformation() + assert new_trans.max_threads == 1 + assert new_trans.parallelizable is False + + +def test_setting_thread_limit_value(): + new_trans = CustomTransformation(max_threads=4) + assert new_trans.max_threads == 4 + + +def test_thread_limit_apply(u): + default_thread_info = threadpool_info() + default_num_thread_limit_list = [thread_info['num_threads'] + for thread_info in default_thread_info] + + new_trans = CustomTransformation(max_threads=2) + _ = new_trans(u.trajectory.ts) + for thread_info in new_trans.runtime_info: + assert thread_info['num_threads'] == 2 + + # test the thread limit is only applied locally. + new_thread_info = threadpool_info() + new_num_thread_limit_list = [thread_info['num_threads'] + for thread_info in new_thread_info] + assert_equal(default_num_thread_limit_list, new_num_thread_limit_list)