MDAnalysis · orbeckst · Nov 2, 2018 · Oct 10, 2018 · Oct 10, 2018 · Oct 25, 2018
diff --git a/CHANGELOG b/CHANGELOG
@@ -22,6 +22,9 @@ Enhancements
   * add parallel particle-particle RDF calculation module pmda.rdf (Issue #41)
   * add readonly_attributes context manager to ParallelAnalysisBase
   * add parallel implementation of Leaflet Finder (Issue #47)
+  * add parallel site-specific RDF calculation module pmda.rdf.InterRDF_s
+    (Issue #60)
+
 
 Fixes
   * always distribute frames over blocks so that no empty blocks are

diff --git a/pmda/rdf.py b/pmda/rdf.py
@@ -172,3 +172,183 @@ def _reduce(res, result_single_frame):
             # Add two numpy arrays
             res += result_single_frame
         return res
+
+
+class InterRDF_s(ParallelAnalysisBase):
+    """Site-specific intermolecular pair distribution function
+
+    Arguments
+    ---------
+    u : Universe
+          a Universe that contains atoms in `ags`
+    ags : list
+          a list of pairs of :class:`~MDAnalysis.core.groups.AtomGroup`
+          instances
+    nbins : int (optional)
+          Number of bins in the histogram [75]
+    range : tuple or list (optional)
+          The size of the RDF [0.0, 15.0]
+
+    Example
+    -------
+
+    First create the :class:`InterRDF_s` object, by supplying one Universe and
+    one list of pairs of AtomGroups, then use the :meth:`~InterRDF_s.run`
+    method::
+
+      from MDAnalysisTests.datafiles import GRO_MEMPROT, XTC_MEMPROT
+      u = mda.Universe(GRO_MEMPROT, XTC_MEMPROT)
+
+      s1 = u.select_atoms('name ZND and resid 289')
+      s2 = u.select_atoms('(name OD1 or name OD2) and resid 51 and sphzone 5.0
+                         (resid 289)')
+      s3 = u.select_atoms('name ZND and (resid 291 or resid 292)')
+      s4 = u.select_atoms('(name OD1 or name OD2) and sphzone 5.0 (resid 291)')
+      ags = [[s1, s2], [s3, s4]]
+
+      rdf = InterRDF_s(u, ags)
+      rdf.run()
+
+    Results are available through the :attr:`bins` and :attr:`rdf` attributes::
+
+      plt.plot(rdf.bins, rdf.rdf[0][0][0])
+
+    (Which plots the rdf between the first atom in ``s1`` and the first atom in
+    ``s2``)
+
+    To generate the *cumulative distribution function* (cdf), use the
+    :meth:`~InterRDF_s.get_cdf` method ::
+
+      cdf = rdf.get_cdf()
+
+    Results are available through the :attr:'cdf' attribute::
+
+      plt.plot(rdf.bins, rdf.cdf[0][0][0])
+
+    (Which plots the cdf between the first atom in ``s1`` and the first atom in
+    ``s2``)
+
+    See Also
+    --------
+    MDAnalysis.analysis.rdf.InterRDF_s
+
+
+    .. versionadded:: 0.2.0
+    """
+
+    # pylint: disable=redefined-builtin
+    # continue to use 'range' as long as MDAnalysis uses it so that
+    # the user interface remains consistent
+    def __init__(self, u, ags,
+                 nbins=75, range=(0.0, 15.0), density=True):
+        atomgroups = []
+        for pair in ags:
+            atomgroups.append(pair[0])
+            atomgroups.append(pair[1])
+        super(InterRDF_s, self).__init__(u, atomgroups)
+
+        # List of pairs of AtomGroups
+        self._density = density
+        self.n = len(ags)
+        self.nf = u.trajectory.n_frames
+        self.rdf_settings = {'bins': nbins,
+                             'range': range}
+        ag_shape = []
+        indices = []
+        for (ag1, ag2) in ags:
+            indices.append([ag1.indices, ag2.indices])
+            ag_shape.append([len(ag1), len(ag2)])
+        self.indices = indices
+        self.ag_shape = ag_shape
+
+    # pylint: enable=redefined-builtin
+
+    def _prepare(self):
+        # Empty list to store the RDF
+        count, edges = np.histogram([-1], **self.rdf_settings)
+        self.len = len(count)
+        self.edges = edges
+        self.bins = 0.5 * (edges[:-1] + edges[1:])
+
+        # Need to know average volume
+        self.volume = 0.0
+        self._maxrange = self.rdf_settings['range'][1]
+
+    def _single_frame(self, ts, atomgroups):
+        ags = [[atomgroups[2*i], atomgroups[2*i+1]] for i in range(self.n)]
+        count = [np.zeros((ag1.n_atoms, ag2.n_atoms, self.len),
+                 dtype=np.float64) for ag1, ag2 in ags]
+        for i, (ag1, ag2) in enumerate(ags):
+            u = ag1.universe
+            pairs, dist = distances.capped_distance(ag1.positions,
+                                                    ag2.positions,
+                                                    self._maxrange,
+                                                    box=u.dimensions)
+
+            for j, (idx1, idx2) in enumerate(pairs):
+                count[i][idx1, idx2, :] = np.histogram(dist[j],
+                                                       **self.rdf_settings)[0]
+
+        volume = u.trajectory.ts.volume
+
+        return np.array([np.array(count), np.array(volume, dtype=np.float64)])
+
+    def _conclude(self):
+        self.count = np.sum(self._results[:, 0])
+        self.volume = np.sum(self._results[:, 1])
+        # Volume in each radial shell
+        vol = np.power(self.edges[1:], 3) - np.power(self.edges[:-1], 3)
+        vol *= 4/3.0 * np.pi
+
+        # Empty lists to restore indices, RDF
+        rdf = []
+
+        for i, (nA, nB) in enumerate(self.ag_shape):
+            # Number of each selection
+            N = nA * nB
+
+            # Average number density
+            box_vol = self.volume / self.nf
+            density = N / box_vol
+
+            if self._density:
+                rdf.append(self.count[i] / (density * vol * self.nf))
+            else:
+                rdf.append(self.count[i] / (vol * self.nf))
+
+        self.rdf = rdf
+
+    def get_cdf(self):
+        """Calculate the cumulative distribution functions (CDF) for all sites.
+        Note that this is the actual count within a given radius, i.e.,
+        :math:`N(r)`.
+
+        Returns
+        -------
+              cdf : list
+                      list of arrays with the same structure as :attr:`rdf`
+        """
+        # Calculate cumulative distribution function
+        # Empty list to restore CDF
+        cdf = []
+
+        for count in self.count:
+            cdf.append(np.cumsum(count, axis=2) / self.nf)
+
+        # Results stored in self.cdf
+        # self.cdf is a list of cdf between pairs of AtomGroups in ags
+        self.cdf = cdf
+
+        return cdf
+
+    @staticmethod
+    def _reduce(res, result_single_frame):
+        """ 'add' action for an accumulator"""
+        if isinstance(res, list) and len(res) == 0:
+            # Convert res from an empty list to a numpy array
+            # which has the same shape as the single frame result
+            res = result_single_frame
+        else:
+            # Add two numpy arrays
+            res += result_single_frame
+        return res
diff --git a/pmda/test/test_rdf.py b/pmda/test/test_rdf.py
@@ -1,24 +1,11 @@
 # -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
-# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
 #
-# MDAnalysis --- https://www.mdanalysis.org
-# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors
+# PMDA
+# Copyright (c) 2017 The MDAnalysis Development Team and contributors
 # (see the file AUTHORS for the full list of names)
 #
 # Released under the GNU Public Licence, v2 or any higher version
-#
-# Please cite your use of MDAnalysis in published work:
-#
-# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler,
-# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein.
-# MDAnalysis: A Python package for the rapid analysis of molecular dynamics
-# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th
-# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy.
-#
-# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
-# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
-# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
-#
 from __future__ import absolute_import
 
 import pytest
@@ -63,7 +50,7 @@ def test_range(u):
     assert rdf.edges[-1] == rmax
 
 
-def test_count_sum(sels):
+def test_count_sum(sels, scheduler):
     # OD1 vs OD2
     # should see 577 comparisons in count
     s1, s2 = sels
@@ -86,11 +73,12 @@ def test_double_run(sels):
     assert len(rdf.count[rdf.count == 3]) == 7
 
 
-def test_same_result(sels):
+@pytest.mark.parametrize("n_blocks", [1, 2, 3, 4])
+def test_same_result(sels, n_blocks):
     # should see same results from analysis.rdf and pmda.rdf
     s1, s2 = sels
     nrdf = rdf.InterRDF(s1, s2).run()
-    prdf = InterRDF(s1, s2).run()
+    prdf = InterRDF(s1, s2).run(n_blocks=n_blocks)
     assert_almost_equal(nrdf.count, prdf.count)
     assert_almost_equal(nrdf.rdf, prdf.rdf)
 

diff --git a/pmda/test/test_rdf_s.py b/pmda/test/test_rdf_s.py
@@ -0,0 +1,119 @@
+# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+#
+# PMDA
+# Copyright (c) 2017 The MDAnalysis Development Team and contributors
+# (see the file AUTHORS for the full list of names)
+#
+# Released under the GNU Public Licence, v2 or any higher version
+from __future__ import absolute_import, division
+
+import pytest
+
+from numpy.testing import assert_almost_equal
+
+import MDAnalysis as mda
+import numpy as np
+from pmda.rdf import InterRDF_s
+from MDAnalysis.analysis import rdf
+
+from MDAnalysisTests.datafiles import GRO_MEMPROT, XTC_MEMPROT
+
+
+@pytest.fixture(scope='module')
+def u():
+    return mda.Universe(GRO_MEMPROT, XTC_MEMPROT)
+
+
+@pytest.fixture(scope='module')
+def sels(u):
+    s1 = u.select_atoms('name ZND and resid 289')
+    s2 = u.select_atoms(
+         '(name OD1 or name OD2) and resid 51 and sphzone 5.0 (resid 289)')
+    s3 = u.select_atoms('name ZND and (resid 291 or resid 292)')
+    s4 = u.select_atoms('(name OD1 or name OD2) and sphzone 5.0 (resid 291)')
+    ags = [[s1, s2], [s3, s4]]
+    return ags
+
+
+@pytest.fixture(scope='module')
+def rdf_s(u, sels, scheduler):
+    return InterRDF_s(u, sels).run()
+
+
+def test_nbins(u):
+    ags = sels(u)
+    rdf = InterRDF_s(u, ags, nbins=412).run()
+
+    assert len(rdf.bins) == 412
+
+
+def test_range(u):
+    ags = sels(u)
+    rmin, rmax = 1.0, 13.0
+    rdf = InterRDF_s(u, ags, range=(rmin, rmax)).run()
+
+    assert rdf.edges[0] == rmin
+    assert rdf.edges[-1] == rmax
+
+
+def test_count_size(rdf_s):
+    # ZND vs OD1 & OD2
+    # should see 2 elements in rdf.count
+    # 1 element in rdf.count[0]
+    # 2 elements in rdf.count[0][0]
+    # 2 elements in rdf.count[1]
+    # 2 elements in rdf.count[1][0]
+    # 2 elements in rdf.count[1][1]
+    assert len(rdf_s.count) == 2
+    assert len(rdf_s.count[0]) == 1
+    assert len(rdf_s.count[0][0]) == 2
+    assert len(rdf_s.count[1]) == 2
+    assert len(rdf_s.count[1][0]) == 2
+    assert len(rdf_s.count[1][1]) == 2
+
+
+def test_count(rdf_s):
+    # should see one distance with 5 counts in count[0][0][1]
+    # should see one distance with 3 counts in count[1][1][0]
+    assert len(rdf_s.count[0][0][1][rdf_s.count[0][0][1] == 5]) == 1
+    assert len(rdf_s.count[1][1][0][rdf_s.count[1][1][0] == 3]) == 1
+
+
+def test_double_run(rdf_s):
+    # running rdf twice should give the same result
+    assert len(rdf_s.count[0][0][1][rdf_s.count[0][0][1] == 5]) == 1
+    assert len(rdf_s.count[1][1][0][rdf_s.count[1][1][0] == 3]) == 1
+
+
+def test_cdf(rdf_s):
+    rdf_s.get_cdf()
+    assert rdf_s.cdf[0][0][0][-1] == rdf_s.count[0][0][0].sum()/rdf_s.nf
+
+
+def test_reduce(rdf_s):
+    # should see numpy.array addtion
+    res = []
+    single_frame = np.array([np.array([1, 2]), np.array([3])])
+    res = rdf_s._reduce(res, single_frame)
+    res = rdf_s._reduce(res, single_frame)
+    assert_almost_equal(res[0], np.array([2, 4]))
+    assert_almost_equal(res[1], np.array([6]))
+
+
+@pytest.mark.parametrize("n_blocks", [1, 2, 3, 4])
+def test_same_result(u, sels, n_blocks):
+    # should see same results from analysis.rdf.InterRDF_s
+    # and pmda.rdf.InterRDF_s
+    nrdf = rdf.InterRDF_s(u, sels).run()
+    prdf = InterRDF_s(u, sels).run(n_blocks=n_blocks)
+    assert_almost_equal(nrdf.count[0][0][0], prdf.count[0][0][0])
+    assert_almost_equal(nrdf.rdf[0][0][0], prdf.rdf[0][0][0])
+
+
+@pytest.mark.parametrize("density, value", [
+    (True, 13275.775528444701),
+    (False, 0.021915460340071267)])
+def test_density(u, sels, density, value):
+    rdf = InterRDF_s(u, sels, density=density).run()
+    assert_almost_equal(max(rdf.rdf[0][0][0]), value)