From 13a5df0fcbf13852da5613cefd84708e1fd506c6 Mon Sep 17 00:00:00 2001
From: Yuxuan Zhuang <yuxuan.zhuang@dbb.su.se>
Date: Sat, 10 Apr 2021 12:37:07 +0200
Subject: [PATCH] New TransformationBase class (#2950)

Fixes #2996

## Work done in this PR
- Adds a TransformationBase class
- TransformationBase uses threadpoolctl to allow the number of threads used to be limited in order to improve performance.
- TransformationBase also includes a check for whether a transformation is parallelizable.
- Refactors existing transformation classes to use TransformationBase.
---
 .github/workflows/gh-ci.yaml                  |   2 +-
 .travis.yml                                   |   4 +-
 azure-pipelines.yml                           |   1 +
 package/CHANGELOG                             |   3 +
 .../MDAnalysis/transformations/__init__.py    |   1 +
 package/MDAnalysis/transformations/base.py    | 124 ++++++++++++++++++
 .../transformations/boxdimensions.py          |  13 +-
 package/MDAnalysis/transformations/fit.py     |  42 ++++--
 .../transformations/positionaveraging.py      |  19 ++-
 package/MDAnalysis/transformations/rotate.py  |  21 ++-
 .../MDAnalysis/transformations/translate.py   |  34 ++++-
 package/MDAnalysis/transformations/wrap.py    |  27 +++-
 .../trajectory_transformations.rst            |  49 ++++++-
 .../transformations/base.rst                  |   1 +
 package/requirements.txt                      |   1 +
 package/setup.py                              |   1 +
 .../transformations/test_base.py              | 104 +++++++++++++++
 17 files changed, 407 insertions(+), 40 deletions(-)
 create mode 100644 package/MDAnalysis/transformations/base.py
 create mode 100644 package/doc/sphinx/source/documentation_pages/transformations/base.rst
 create mode 100644 testsuite/MDAnalysisTests/transformations/test_base.py

diff --git a/.github/workflows/gh-ci.yaml b/.github/workflows/gh-ci.yaml
index 954a3852b67..9398d0f722c 100644
--- a/.github/workflows/gh-ci.yaml
+++ b/.github/workflows/gh-ci.yaml
@@ -14,7 +14,7 @@ defaults:
     shell: bash -l {0}
 
 env:
-  MDA_CONDA_MIN_DEPS: "pip pytest==6.1.2 mmtf-python biopython networkx cython matplotlib-base scipy griddataformats hypothesis gsd codecov"
+  MDA_CONDA_MIN_DEPS: "pip pytest==6.1.2 mmtf-python biopython networkx cython matplotlib-base scipy griddataformats hypothesis gsd codecov threadpoolctl"
   MDA_CONDA_EXTRA_DEPS: "seaborn>=0.7.0 clustalw=2.1 netcdf4 scikit-learn joblib>=0.12 chemfiles tqdm>=4.43.0 tidynamics>=1.0.0 rdkit>=2020.03.1 h5py"
   MDA_PIP_MIN_DEPS: 'coveralls coverage<5 pytest-cov pytest-xdist'
   MDA_PIP_EXTRA_DEPS: 'duecredit parmed'
diff --git a/.travis.yml b/.travis.yml
index 3be2f063d1b..14b77d49953 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -25,7 +25,7 @@ matrix:
         - conda activate base
         - conda update --yes conda
         - conda install --yes pip pytest==6.1.2 mmtf-python biopython networkx cython matplotlib-base "scipy>=1.6" griddataformats hypothesis gsd codecov -c conda-forge -c biobuilds
-        - pip install pytest-xdist
+        - pip install pytest-xdist threadpoolctl
         - conda info
       install:
         - (cd package/ && python setup.py develop) && (cd testsuite/ && python setup.py install)
@@ -45,7 +45,7 @@ matrix:
       if: type = cron
       before_install:
         - python -m pip install cython numpy scipy
-        - python -m pip install --no-build-isolation hypothesis matplotlib pytest pytest-cov pytest-xdist tqdm
+        - python -m pip install --no-build-isolation hypothesis matplotlib pytest pytest-cov pytest-xdist tqdm threadpoolctl
       install:
         - cd package
         - python setup.py install
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 49abbd832fa..fde17babf5e 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -59,6 +59,7 @@ jobs:
       scipy
       h5py
       tqdm
+      threadpoolctl
     displayName: 'Install dependencies'
   # TODO: recent rdkit is not on PyPI
   - script: >-
diff --git a/package/CHANGELOG b/package/CHANGELOG
index 7f1ce2030ff..a099fc7a06a 100644
--- a/package/CHANGELOG
+++ b/package/CHANGELOG
@@ -156,6 +156,8 @@ Enhancements
     'protein' selection (#2751 PR #2755)
   * Added an RDKit converter that works for any input with all hydrogens
     explicit in the topology (Issue #2468, PR #2775)
+  * Added `TransformationBase`. Implemented threadlimit and add an attribute for
+    checking if it can be used in parallel analysis. (Issue #2996, PR #2950)
 
 Changes
   * Fixed inaccurate docstring inside the RMSD class (Issue #2796, PR #3134) 
@@ -198,6 +200,7 @@ Changes
   * deprecated ``analysis.helanal`` module has been removed in favour of
     ``analysis.helix_analysis`` (PR #2929)
   * Move water bridge analysis from hbonds to hydrogenbonds (Issue #2739 PR #2913)
+  * `threadpoolctl` is required for installation (Issue #2860)
 
 Deprecations
 
diff --git a/package/MDAnalysis/transformations/__init__.py b/package/MDAnalysis/transformations/__init__.py
index 7fd56d2eab6..e9508cddb35 100644
--- a/package/MDAnalysis/transformations/__init__.py
+++ b/package/MDAnalysis/transformations/__init__.py
@@ -125,6 +125,7 @@ def wrapped(ts):
     method instead of being written as a function/closure.
 """
 
+from .base import TransformationBase
 from .translate import translate, center_in_box
 from .rotate import rotateby
 from .positionaveraging import PositionAverager
diff --git a/package/MDAnalysis/transformations/base.py b/package/MDAnalysis/transformations/base.py
new file mode 100644
index 00000000000..497ab5937eb
--- /dev/null
+++ b/package/MDAnalysis/transformations/base.py
@@ -0,0 +1,124 @@
+# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+#
+# MDAnalysis --- https://www.mdanalysis.org
+# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
+# (see the file AUTHORS for the full list of names)
+#
+# Released under the GNU Public Licence, v2 or any higher version
+#
+# Please cite your use of MDAnalysis in published work:
+#
+# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
+# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
+# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
+# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
+# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
+#
+# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
+# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
+# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
+#
+
+"""\
+Transformations Base Class --- :mod:`MDAnalysis.transformations.base`
+=====================================================================
+
+.. autoclass:: TransformationBase
+
+"""
+from threadpoolctl import threadpool_limits
+
+
+class TransformationBase(object):
+    """Base class for defining on-the-fly transformations
+
+    The class is designed as a template for creating on-the-fly
+    Transformation classes. This class will
+
+    1) set up a context manager framework on limiting the threads
+    per call, which may be the multi-thread OpenBlas backend of NumPy.
+    This backend may kill the performance when subscribing hyperthread
+    or oversubscribing the threads when used together with other parallel
+    engines e.g. Dask.
+    (PR `#2950 <https://github.com/MDAnalysis/mdanalysis/pull/2950>`_)
+
+    Define ``max_threads=1`` when that is the case.
+
+    2) set up a boolean attribute `parallelizable` for checking if the
+    transformation can be applied in a **split-apply-combine** parallelism.
+    For example, the :class:`~MDAnalysis.transformations.positionaveraging.PositionAverager`
+    is history-dependent and can not be used in parallel analysis natively.
+    (Issue `#2996 <https://github.com/MDAnalysis/mdanalysis/issues/2996>`_)
+
+    To define a new Transformation, :class:`TransformationBase`
+    has to be subclassed.
+    ``max_threads`` will be set to ``None`` by default,
+    i.e. does not do anything and any settings in the environment such as
+    the environment variable :envvar:`OMP_NUM_THREADS`
+    (see the `OpenMP specification for OMP_NUM_THREADS <https://www.openmp.org/spec-html/5.0/openmpse50.html>`_)
+    are used.
+    ``parallelizable`` will be set to ``True`` by default.
+    You may need to double check if it can be used in parallel analysis;
+    if not, override the value to ``False``.
+    Note this attribute is not checked anywhere in MDAnalysis yet.
+    Developers of the parallel analysis have to check it in their own code.
+
+    .. code-block:: python
+
+       class NewTransformation(TransformationBase):
+           def __init__(self, ag, parameter,
+                        max_threads=1, parallelizable=True):
+               super().__init__(max_threads=max_threads,
+                                 parallelizable=parallelizable)
+               self.ag = ag
+               self._param = parameter
+
+           def _transform(self, ts):
+               #  REQUIRED
+               ts.positions = some_function(ts, self.ag, self._param)
+               return ts
+
+    Afterwards the new transformation can be run like this.
+
+    .. code-block:: python
+
+       new_transformation = NewTransformation(ag, param)
+       u.trajectory.add_transformations(new_transformation)
+
+
+    .. versionadded:: 2.0.0
+       Add the base class for all transformations to limit threads and
+       check if it can be used in parallel analysis.
+    """
+
+    def __init__(self, **kwargs):
+        """
+        Parameters
+        ----------
+        max_threads: int, optional
+           The maximum thread number can be used.
+           Default is ``None``, which means the default or the external setting.
+        parallelizable: bool, optional
+           A check for if this can be used in split-apply-combine parallel
+           analysis approach.
+           Default is ``True``.
+        """
+        self.max_threads = kwargs.pop('max_threads', None)
+        self.parallelizable = kwargs.pop('parallelizable', True)
+
+    def __call__(self, ts):
+        """The function that makes transformation can be called as a function
+
+        The thread limit works as a context manager with given `max_threads`
+        wrapping the real :func:`_transform` function
+        """
+        with threadpool_limits(self.max_threads):
+            return self._transform(ts)
+
+    def _transform(self, ts):
+        """Transform the given `Timestep`
+
+        It deals with the transformation of a single `Timestep`.
+        """
+        raise NotImplementedError("Only implemented in child classes")
diff --git a/package/MDAnalysis/transformations/boxdimensions.py b/package/MDAnalysis/transformations/boxdimensions.py
index 5a6569caa90..ad6c9bf52c5 100644
--- a/package/MDAnalysis/transformations/boxdimensions.py
+++ b/package/MDAnalysis/transformations/boxdimensions.py
@@ -32,8 +32,10 @@
 """
 import numpy as np
 
+from .base import TransformationBase
 
-class set_dimensions:
+
+class set_dimensions(TransformationBase):
     """
     Set simulation box dimensions.
 
@@ -63,7 +65,12 @@ class set_dimensions:
     :class:`~MDAnalysis.coordinates.base.Timestep` object
     """
 
-    def __init__(self, dimensions):
+    def __init__(self,
+                 dimensions,
+                 max_threads=None,
+                 parallelizable=True):
+        super().__init__(max_threads=max_threads,
+                         parallelizable=parallelizable)
         self.dimensions = dimensions
 
         try:
@@ -82,6 +89,6 @@ def __init__(self, dimensions):
                        '[a, b, c, alpha, beta, gamma]')
             raise ValueError(errmsg)
 
-    def __call__(self, ts):
+    def _transform(self, ts):
         ts.dimensions = self.dimensions
         return ts
diff --git a/package/MDAnalysis/transformations/fit.py b/package/MDAnalysis/transformations/fit.py
index 85a2c93e615..98ace818c96 100644
--- a/package/MDAnalysis/transformations/fit.py
+++ b/package/MDAnalysis/transformations/fit.py
@@ -37,8 +37,10 @@
 from ..analysis import align
 from ..lib.transformations import euler_from_matrix, euler_matrix
 
+from .base import TransformationBase
 
-class fit_translation(object):
+
+class fit_translation(TransformationBase):
     """Translates a given AtomGroup so that its center of geometry/mass matches
     the respective center of the given reference. A plane can be given by the
     user using the option `plane`, and will result in the removal of
@@ -82,10 +84,17 @@ class fit_translation(object):
 
 
     .. versionchanged:: 2.0.0
-        The transformation was changed from a function/closure to a class
-        with ``__call__``.
+       The transformation was changed from a function/closure to a class
+       with ``__call__``.
+    .. versionchanged:: 2.0.0
+       The transformation was changed to inherit from the base class for
+       limiting threads and checking if it can be used in parallel analysis.
     """
-    def __init__(self, ag, reference, plane=None, weights=None):
+    def __init__(self, ag, reference, plane=None, weights=None,
+                 max_threads=None, parallelizable=True):
+        super().__init__(max_threads=max_threads,
+                         parallelizable=parallelizable)
+
         self.ag = ag
         self.reference = reference
         self.plane = plane
@@ -117,7 +126,7 @@ def __init__(self, ag, reference, plane=None, weights=None):
         self.weights = align.get_weights(self.ref.atoms, weights=self.weights)
         self.ref_com = self.ref.center(self.weights)
 
-    def __call__(self, ts):
+    def _transform(self, ts):
         mobile_com = np.asarray(self.mobile.atoms.center(self.weights),
                                 np.float32)
         vector = self.ref_com - mobile_com
@@ -128,7 +137,7 @@ def __call__(self, ts):
         return ts
 
 
-class fit_rot_trans(object):
+class fit_rot_trans(TransformationBase):
     """Perform a spatial superposition by minimizing the RMSD.
 
     Spatially align the group of atoms `ag` to `reference` by doing a RMSD
@@ -140,6 +149,11 @@ class fit_rot_trans(object):
     removed. This is useful for protein-membrane systems to where the membrane
     must remain in the same orientation.
 
+    Note
+    ----
+    ``max_threads`` is set to 1 for this transformation
+    with which it performs better.
+
     Example
     -------
     Removing the translations and rotations of a given AtomGroup `ag` on the XY plane
@@ -174,8 +188,20 @@ class fit_rot_trans(object):
     Returns
     -------
     MDAnalysis.coordinates.base.Timestep
+
+
+    .. versionchanged:: 2.0.0
+       The transformation was changed from a function/closure to a class
+       with ``__call__``.
+    .. versionchanged:: 2.0.0
+       The transformation was changed to inherit from the base class for
+       limiting threads and checking if it can be used in parallel analysis.
     """
-    def __init__(self, ag, reference, plane=None, weights=None):
+    def __init__(self, ag, reference, plane=None, weights=None,
+                 max_threads=1, parallelizable=True):
+        super().__init__(max_threads=max_threads,
+                         parallelizable=parallelizable)
+
         self.ag = ag
         self.reference = reference
         self.plane = plane
@@ -207,7 +233,7 @@ def __init__(self, ag, reference, plane=None, weights=None):
         self.ref_com = self.ref.center(self.weights)
         self.ref_coordinates = self.ref.atoms.positions - self.ref_com
 
-    def __call__(self, ts):
+    def _transform(self, ts):
         mobile_com = self.mobile.atoms.center(self.weights)
         mobile_coordinates = self.mobile.atoms.positions - mobile_com
         rotation, dump = align.rotation_matrix(mobile_coordinates,
diff --git a/package/MDAnalysis/transformations/positionaveraging.py b/package/MDAnalysis/transformations/positionaveraging.py
index 930a77774b3..57b4d66465c 100644
--- a/package/MDAnalysis/transformations/positionaveraging.py
+++ b/package/MDAnalysis/transformations/positionaveraging.py
@@ -35,8 +35,10 @@
 import numpy as np
 import warnings
 
+from .base import TransformationBase
 
-class PositionAverager(object):
+
+class PositionAverager(TransformationBase):
     """
     Averages the coordinates of a given timestep so that the coordinates
     of the AtomGroup correspond to the average positions of the N previous
@@ -132,11 +134,18 @@ class PositionAverager(object):
     Returns
     -------
     MDAnalysis.coordinates.base.Timestep
-    
-    
+
+
+    .. versionchanged:: 2.0.0
+       The transformation was changed to inherit from the base class for
+       limiting threads and checking if it can be used in parallel analysis.
     """
 
-    def __init__(self, avg_frames, check_reset=True):
+    def __init__(self, avg_frames, check_reset=True,
+                 max_threads=None,
+                 parallelizable=False):
+        super().__init__(max_threads=max_threads,
+                         parallelizable=parallelizable)
         self.avg_frames = avg_frames
         self.check_reset = check_reset
         self.current_avg = 0
@@ -162,7 +171,7 @@ def rollposx(self, ts):
         self.coord_array = np.roll(self.coord_array, 1, axis=2)
         self.coord_array[..., 0] = ts.positions.copy()
 
-    def __call__(self, ts):
+    def _transform(self, ts):
         #  calling the same timestep will not add new data to coord_array
         #  This can prevent from getting different values when
         #  call `u.trajectory[i]` multiple times.
diff --git a/package/MDAnalysis/transformations/rotate.py b/package/MDAnalysis/transformations/rotate.py
index afbba495a61..0c9bd80fc96 100644
--- a/package/MDAnalysis/transformations/rotate.py
+++ b/package/MDAnalysis/transformations/rotate.py
@@ -37,14 +37,21 @@
 from ..lib.transformations import rotation_matrix
 from ..lib.util import get_weights
 
+from .base import TransformationBase
 
-class rotateby(object):
+
+class rotateby(TransformationBase):
     '''
     Rotates the trajectory by a given angle on a given axis. The axis is defined by
     the user, combining the direction vector and a point. This point can be the center
     of geometry or the center of mass of a user defined AtomGroup, or an array defining
     custom coordinates.
 
+    Note
+    ----
+    ``max_threads`` is set to 1 for this transformation
+    with which it performs better.
+
     Examples
     --------
 
@@ -111,6 +118,9 @@ class rotateby(object):
     .. versionchanged:: 2.0.0
         The transformation was changed from a function/closure to a class
         with ``__call__``.
+    .. versionchanged:: 2.0.0
+       The transformation was changed to inherit from the base class for
+       limiting threads and checking if it can be used in parallel analysis.
     '''
     def __init__(self,
                  angle,
@@ -118,7 +128,12 @@ def __init__(self,
                  point=None,
                  ag=None,
                  weights=None,
-                 wrap=False):
+                 wrap=False,
+                 max_threads=1,
+                 parallelizable=True):
+        super().__init__(max_threads=max_threads,
+                         parallelizable=parallelizable)
+
         self.angle = angle
         self.direction = direction
         self.point = point
@@ -162,7 +177,7 @@ def __init__(self,
         else:
             raise ValueError('A point or an AtomGroup must be specified')
 
-    def __call__(self, ts):
+    def _transform(self, ts):
         if self.point is None:
             position = self.center_method()
         else:
diff --git a/package/MDAnalysis/transformations/translate.py b/package/MDAnalysis/transformations/translate.py
index 1b1be2d92ed..a69d7757146 100644
--- a/package/MDAnalysis/transformations/translate.py
+++ b/package/MDAnalysis/transformations/translate.py
@@ -39,14 +39,18 @@
 import numpy as np
 from functools import partial
 
+from .base import TransformationBase
 
-class translate(object):
+
+class translate(TransformationBase):
     """
     Translates the coordinates of a given :class:`~MDAnalysis.coordinates.base.Timestep`
     instance by a given vector.
 
     Example
     -------
+    .. code-block:: python
+
         ts = MDAnalysis.transformations.translate([1,2,3])(ts)
 
     Parameters
@@ -58,8 +62,19 @@ class translate(object):
     -------
     :class:`~MDAnalysis.coordinates.base.Timestep` object
 
+
+    .. versionchanged:: 2.0.0
+       The transformation was changed from a function/closure to a class
+       with ``__call__``.
+    .. versionchanged:: 2.0.0
+       The transformation was changed to inherit from the base class for
+       limiting threads and checking if it can be used in parallel analysis.
     """
-    def __init__(self, vector):
+    def __init__(self, vector,
+                 max_threads=None, parallelizable=True):
+        super().__init__(max_threads=max_threads,
+                         parallelizable=parallelizable)
+
         self.vector = vector
 
         if len(self.vector) > 2:
@@ -67,12 +82,12 @@ def __init__(self, vector):
         else:
             raise ValueError("{} vector is too short".format(self.vector))
 
-    def __call__(self, ts):
+    def _transform(self, ts):
         ts.positions += self.vector
         return ts
 
 
-class center_in_box(object):
+class center_in_box(TransformationBase):
     """
     Translates the coordinates of a given :class:`~MDAnalysis.coordinates.base.Timestep`
     instance so that the center of geometry/mass of the given :class:`~MDAnalysis.core.groups.AtomGroup`
@@ -111,8 +126,15 @@ class center_in_box(object):
     .. versionchanged:: 2.0.0
         The transformation was changed from a function/closure to a class
         with ``__call__``.
+    .. versionchanged:: 2.0.0
+       The transformation was changed to inherit from the base class for
+       limiting threads and checking if it can be used in parallel analysis.
     """
-    def __init__(self, ag, center='geometry', point=None, wrap=False):
+    def __init__(self, ag, center='geometry', point=None, wrap=False,
+                 max_threads=None, parallelizable=True):
+        super().__init__(max_threads=max_threads,
+                         parallelizable=parallelizable)
+
         self.ag = ag
         self.center = center
         self.point = point
@@ -140,7 +162,7 @@ def __init__(self, ag, center='geometry', point=None, wrap=False):
                 raise ValueError(f'{self.ag} is not an AtomGroup object') \
                                  from None
 
-    def __call__(self, ts):
+    def _transform(self, ts):
         if self.point is None:
             boxcenter = np.sum(ts.triclinic_dimensions, axis=0) / 2
         else:
diff --git a/package/MDAnalysis/transformations/wrap.py b/package/MDAnalysis/transformations/wrap.py
index 10e565fc072..1b3246b639e 100644
--- a/package/MDAnalysis/transformations/wrap.py
+++ b/package/MDAnalysis/transformations/wrap.py
@@ -36,8 +36,10 @@
 
 from ..lib._cutil import make_whole
 
+from .base import TransformationBase
 
-class wrap(object):
+
+class wrap(TransformationBase):
     """
     Shift the contents of a given AtomGroup back into the unit cell. ::
     
@@ -84,17 +86,24 @@ class wrap(object):
     .. versionchanged:: 2.0.0
         The transformation was changed from a function/closure to a class
         with ``__call__``.
+    .. versionchanged:: 2.0.0
+       The transformation was changed to inherit from the base class for
+       limiting threads and checking if it can be used in parallel analysis.
     """
-    def __init__(self, ag, compound='atoms'):
+    def __init__(self, ag, compound='atoms',
+                 max_threads=None, parallelizable=True):
+        super().__init__(max_threads=max_threads,
+                         parallelizable=parallelizable)
+
         self.ag = ag
         self.compound = compound
 
-    def __call__(self, ts):
+    def _transform(self, ts):
         self.ag.wrap(compound=self.compound)
         return ts
 
 
-class unwrap(object):
+class unwrap(TransformationBase):
     """
     Move all atoms in an AtomGroup so that bonds don't split over images
 
@@ -138,8 +147,14 @@ class unwrap(object):
     .. versionchanged:: 2.0.0
         The transformation was changed from a function/closure to a class
         with ``__call__``.
+    .. versionchanged:: 2.0.0
+       The transformation was changed to inherit from the base class for
+       limiting threads and checking if it can be used in parallel analysis.
     """
-    def __init__(self, ag):
+    def __init__(self, ag, max_threads=None, parallelizable=True):
+        super().__init__(max_threads=max_threads,
+                         parallelizable=parallelizable)
+
         self.ag = ag
 
         try:
@@ -147,7 +162,7 @@ def __init__(self, ag):
         except AttributeError:
             raise AttributeError("{} has no fragments".format(self.ag))
 
-    def __call__(self, ts):
+    def _transform(self, ts):
         for frag in self.ag.fragments:
             make_whole(frag)
         return ts
diff --git a/package/doc/sphinx/source/documentation_pages/trajectory_transformations.rst b/package/doc/sphinx/source/documentation_pages/trajectory_transformations.rst
index 4d885cb0c45..aafc31976f8 100644
--- a/package/doc/sphinx/source/documentation_pages/trajectory_transformations.rst
+++ b/package/doc/sphinx/source/documentation_pages/trajectory_transformations.rst
@@ -90,7 +90,7 @@ being added.
 Creating transformations
 ------------------------
 
-A simple *transformation* can also be  a function that takes a
+A simple *transformation* can also be a function that takes a
 :class:`~MDAnalysis.coordinates.base.Timestep` as input, modifies it, and
 returns it. If it takes no other arguments but a :class:`Timestep`
 can be defined as the following example:
@@ -113,17 +113,23 @@ the following two methods can be used to create such transformation:
 Creating complex transformation classes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
 
-It is implemented by defining :func:`__call__` for the transformation class
-and can be applied directly to a :class:`Timestep`.
+It is implemented by inheriting from
+:class:`MDAnalysis.transformations.base.TransformationBase`,
+which defines :func:`__call__` for the transformation class
+and can be applied directly to a :class:`Timestep`. :func:`_transform` has to
+be defined and include the operations on the :class:`MDAnalysis.coordinates.base.Timestep`.
+
 So, a transformation class can be roughly defined as follows:
 
 .. code-block:: python
 
-    class up_by_x_class(object):
+    from MDAnalysis.transformations import TransformationBase
+
+    class up_by_x_class(TransformationBase):
         def __init__(self, distance):
             self.distance = distance
 
-        def __call__(self, ts):
+        def _transform(self, ts):
             ts.positions = ts.positions + np.array([0, 0, self.distance], dtype=np.float32)
             return ts
 
@@ -185,7 +191,14 @@ any of these transformations, the module must first be imported:
 
    import MDAnalysis.transformations
 
-A workflow can then be added to a trajectory as described above.
+A workflow can then be added to a trajectory as described above. Notably,
+the parameter `max_threads` can be defined when creating a transformation
+instance to limit the maximum threads.
+(See :class:`MDAnalysis.transformations.base.TransformationBase` for more details) 
+Whether a specific transformation can be used along with parallel analysis
+can be assessed by checking its 
+:attr:`~MDAnalysis.transformations.base.TransformationBase.parallelizable`
+attribute.
 
 See :ref:`implemented-transformations` for more on the existing
 transformations in :mod:`MDAnalysis.transformations`.
@@ -221,6 +234,30 @@ Giving a workflow as a keyword argument when defining the universe:
     u = MDAnalysis.Universe(topology, trajectory, transformations=workflow)
     
 
+.. _building-block-transformation:
+
+Building blocks for Transformation Classes
+------------------------------------------
+Transformations normally ultilize the power of NumPy to get better performance
+on array operations. However, when it comes to parallelism, NumPy will sometimes
+oversubscribe the threads, either by hyper threading (when it uses OpenBlas backend),
+or by working with other parallel engines (e.g. Dask).
+
+In MDAnalysis, we use `threadpoolctl <https://github.com/joblib/threadpoolctl>`_
+inside :class:`~MDAnalysis.transformations.base.TransformationBase` to control the maximum threads for transformations.
+
+It is also possible to apply a global thread limit by setting the external environmental
+varibale, e.g. :code:`OMP_NUM_THREADS=1 MKL_NUM_THREADS=1 OPENBLAS_NUM_THREADS=1
+BLIS_NUM_THREADS=1 python script.py`. Read more about parallelism and resource management
+in `scikit-learn documentations <https://scikit-learn.org/dev/computing/parallelism.html>`_.
+
+Users are advised to benchmark code because interaction between different
+libraries can lead to sub-optimal performance with defaults.
+
+.. toctree::
+
+   ./transformations/base
+
 .. _implemented-transformations:
 
 Currently implemented transformations
diff --git a/package/doc/sphinx/source/documentation_pages/transformations/base.rst b/package/doc/sphinx/source/documentation_pages/transformations/base.rst
new file mode 100644
index 00000000000..d5a0377540d
--- /dev/null
+++ b/package/doc/sphinx/source/documentation_pages/transformations/base.rst
@@ -0,0 +1 @@
+.. automodule:: MDAnalysis.transformations.base
diff --git a/package/requirements.txt b/package/requirements.txt
index 684bce83d01..54e993f063d 100644
--- a/package/requirements.txt
+++ b/package/requirements.txt
@@ -20,4 +20,5 @@ seaborn>=0.7.0,<=0.9
 sphinx==1.8.5
 sphinx_rtd_theme
 sphinx_sitemap
+threadpoolctl
 tqdm
diff --git a/package/setup.py b/package/setup.py
index 474b6ff2eda..bdbe9ade8a7 100755
--- a/package/setup.py
+++ b/package/setup.py
@@ -584,6 +584,7 @@ def long_description(readme):
           'scipy>=1.0.0',
           'matplotlib>=1.5.1',
           'tqdm>=4.43.0',
+          'threadpoolctl',
     ]
     if not os.name == 'nt':
         install_requires.append('gsd>=1.4.0')
diff --git a/testsuite/MDAnalysisTests/transformations/test_base.py b/testsuite/MDAnalysisTests/transformations/test_base.py
new file mode 100644
index 00000000000..acf76ee4df8
--- /dev/null
+++ b/testsuite/MDAnalysisTests/transformations/test_base.py
@@ -0,0 +1,104 @@
+
+# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8
+#
+# MDAnalysis --- https://www.mdanalysis.org
+# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
+# (see the file AUTHORS for the full list of names)
+#
+# Released under the GNU Public Licence, v2 or any higher version
+#
+# Please cite your use of MDAnalysis in published work:
+#
+# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
+# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
+# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
+# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
+# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
+#
+# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
+# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
+# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
+#
+import numpy as np
+import pytest
+from numpy.testing import assert_equal
+from threadpoolctl import threadpool_info
+
+import MDAnalysis as mda
+from MDAnalysisTests.datafiles import PSF, DCD
+from MDAnalysis.transformations.base import TransformationBase
+
+
+class DefaultTransformation(TransformationBase):
+    """Default values for max_threads and parallelizable"""
+    def __init__(self):
+        super().__init__()
+
+    def _transform(self, ts):
+        self.runtime_info = threadpool_info()
+        ts.positions = ts.positions + 1
+        return ts
+
+
+class NoTransform_Transformation(TransformationBase):
+    """Default values for max_threads and parallelizable"""
+    def __init__(self):
+        super().__init__()
+
+
+class CustomTransformation(TransformationBase):
+    """Custom value for max_threads and parallelizable"""
+    def __init__(self, max_threads=1, parallelizable=False):
+        super().__init__(max_threads=max_threads,
+                         parallelizable=parallelizable)
+
+    def _transform(self, ts):
+        self.runtime_info = threadpool_info()
+        ts.positions = ts.positions + 1
+        return ts
+
+
+@pytest.fixture(scope='module')
+def u():
+    return mda.Universe(PSF, DCD)
+
+
+def test_default_value():
+    new_trans = DefaultTransformation()
+    assert new_trans.max_threads is None
+    assert new_trans.parallelizable is True
+
+
+def test_no_transform_function(u):
+    new_trans = NoTransform_Transformation()
+    with pytest.raises(NotImplementedError, match=r"Only implemented"):
+        _ = new_trans._transform(u.trajectory.ts)
+
+
+def test_custom_value():
+    new_trans = CustomTransformation()
+    assert new_trans.max_threads == 1
+    assert new_trans.parallelizable is False
+
+
+def test_setting_thread_limit_value():
+    new_trans = CustomTransformation(max_threads=4)
+    assert new_trans.max_threads == 4
+
+
+def test_thread_limit_apply(u):
+    default_thread_info = threadpool_info()
+    default_num_thread_limit_list = [thread_info['num_threads']
+                                     for thread_info in default_thread_info]
+
+    new_trans = CustomTransformation(max_threads=2)
+    _ = new_trans(u.trajectory.ts)
+    for thread_info in new_trans.runtime_info:
+        assert thread_info['num_threads'] == 2
+
+    #  test the thread limit is only applied locally.
+    new_thread_info = threadpool_info()
+    new_num_thread_limit_list = [thread_info['num_threads']
+                                 for thread_info in new_thread_info]
+    assert_equal(default_num_thread_limit_list, new_num_thread_limit_list)