diff --git a/CHANGELOG b/CHANGELOG index f473d050..676ecc41 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -22,6 +22,9 @@ Enhancements * add parallel particle-particle RDF calculation module pmda.rdf (Issue #41) * add readonly_attributes context manager to ParallelAnalysisBase * add parallel implementation of Leaflet Finder (Issue #47) + * add parallel site-specific RDF calculation module pmda.rdf.InterRDF_s + (Issue #60) + Fixes * always distribute frames over blocks so that no empty blocks are diff --git a/pmda/rdf.py b/pmda/rdf.py index 40aebc39..c6f85bd4 100644 --- a/pmda/rdf.py +++ b/pmda/rdf.py @@ -172,3 +172,183 @@ def _reduce(res, result_single_frame): # Add two numpy arrays res += result_single_frame return res + + +class InterRDF_s(ParallelAnalysisBase): + """Site-specific intermolecular pair distribution function + + Arguments + --------- + u : Universe + a Universe that contains atoms in `ags` + ags : list + a list of pairs of :class:`~MDAnalysis.core.groups.AtomGroup` + instances + nbins : int (optional) + Number of bins in the histogram [75] + range : tuple or list (optional) + The size of the RDF [0.0, 15.0] + + Example + ------- + + First create the :class:`InterRDF_s` object, by supplying one Universe and + one list of pairs of AtomGroups, then use the :meth:`~InterRDF_s.run` + method:: + + from MDAnalysisTests.datafiles import GRO_MEMPROT, XTC_MEMPROT + u = mda.Universe(GRO_MEMPROT, XTC_MEMPROT) + + s1 = u.select_atoms('name ZND and resid 289') + s2 = u.select_atoms('(name OD1 or name OD2) and resid 51 and sphzone 5.0 + (resid 289)') + s3 = u.select_atoms('name ZND and (resid 291 or resid 292)') + s4 = u.select_atoms('(name OD1 or name OD2) and sphzone 5.0 (resid 291)') + ags = [[s1, s2], [s3, s4]] + + rdf = InterRDF_s(u, ags) + rdf.run() + + Results are available through the :attr:`bins` and :attr:`rdf` attributes:: + + plt.plot(rdf.bins, rdf.rdf[0][0][0]) + + (Which plots the rdf between the first atom in ``s1`` and the first atom in + ``s2``) + + To generate the *cumulative distribution function* (cdf), use the + :meth:`~InterRDF_s.get_cdf` method :: + + cdf = rdf.get_cdf() + + Results are available through the :attr:'cdf' attribute:: + + plt.plot(rdf.bins, rdf.cdf[0][0][0]) + + (Which plots the cdf between the first atom in ``s1`` and the first atom in + ``s2``) + + See Also + -------- + MDAnalysis.analysis.rdf.InterRDF_s + + + .. versionadded:: 0.2.0 + """ + + # pylint: disable=redefined-builtin + # continue to use 'range' as long as MDAnalysis uses it so that + # the user interface remains consistent + def __init__(self, u, ags, + nbins=75, range=(0.0, 15.0), density=True): + atomgroups = [] + for pair in ags: + atomgroups.append(pair[0]) + atomgroups.append(pair[1]) + super(InterRDF_s, self).__init__(u, atomgroups) + + # List of pairs of AtomGroups + self._density = density + self.n = len(ags) + self.nf = u.trajectory.n_frames + self.rdf_settings = {'bins': nbins, + 'range': range} + ag_shape = [] + indices = [] + for (ag1, ag2) in ags: + indices.append([ag1.indices, ag2.indices]) + ag_shape.append([len(ag1), len(ag2)]) + self.indices = indices + self.ag_shape = ag_shape + + # pylint: enable=redefined-builtin + + def _prepare(self): + # Empty list to store the RDF + count, edges = np.histogram([-1], **self.rdf_settings) + self.len = len(count) + self.edges = edges + self.bins = 0.5 * (edges[:-1] + edges[1:]) + + # Need to know average volume + self.volume = 0.0 + self._maxrange = self.rdf_settings['range'][1] + + def _single_frame(self, ts, atomgroups): + ags = [[atomgroups[2*i], atomgroups[2*i+1]] for i in range(self.n)] + count = [np.zeros((ag1.n_atoms, ag2.n_atoms, self.len), + dtype=np.float64) for ag1, ag2 in ags] + for i, (ag1, ag2) in enumerate(ags): + u = ag1.universe + pairs, dist = distances.capped_distance(ag1.positions, + ag2.positions, + self._maxrange, + box=u.dimensions) + + for j, (idx1, idx2) in enumerate(pairs): + count[i][idx1, idx2, :] = np.histogram(dist[j], + **self.rdf_settings)[0] + + volume = u.trajectory.ts.volume + + return np.array([np.array(count), np.array(volume, dtype=np.float64)]) + + def _conclude(self): + self.count = np.sum(self._results[:, 0]) + self.volume = np.sum(self._results[:, 1]) + # Volume in each radial shell + vol = np.power(self.edges[1:], 3) - np.power(self.edges[:-1], 3) + vol *= 4/3.0 * np.pi + + # Empty lists to restore indices, RDF + rdf = [] + + for i, (nA, nB) in enumerate(self.ag_shape): + # Number of each selection + N = nA * nB + + # Average number density + box_vol = self.volume / self.nf + density = N / box_vol + + if self._density: + rdf.append(self.count[i] / (density * vol * self.nf)) + else: + rdf.append(self.count[i] / (vol * self.nf)) + + self.rdf = rdf + + def get_cdf(self): + """Calculate the cumulative distribution functions (CDF) for all sites. + Note that this is the actual count within a given radius, i.e., + :math:`N(r)`. + + Returns + ------- + cdf : list + list of arrays with the same structure as :attr:`rdf` + """ + # Calculate cumulative distribution function + # Empty list to restore CDF + cdf = [] + + for count in self.count: + cdf.append(np.cumsum(count, axis=2) / self.nf) + + # Results stored in self.cdf + # self.cdf is a list of cdf between pairs of AtomGroups in ags + self.cdf = cdf + + return cdf + + @staticmethod + def _reduce(res, result_single_frame): + """ 'add' action for an accumulator""" + if isinstance(res, list) and len(res) == 0: + # Convert res from an empty list to a numpy array + # which has the same shape as the single frame result + res = result_single_frame + else: + # Add two numpy arrays + res += result_single_frame + return res diff --git a/pmda/test/test_rdf.py b/pmda/test/test_rdf.py index 2dfec024..7d33f58e 100644 --- a/pmda/test/test_rdf.py +++ b/pmda/test/test_rdf.py @@ -1,24 +1,11 @@ # -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8 +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 # -# MDAnalysis --- https://www.mdanalysis.org -# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# PMDA +# Copyright (c) 2017 The MDAnalysis Development Team and contributors # (see the file AUTHORS for the full list of names) # # Released under the GNU Public Licence, v2 or any higher version -# -# Please cite your use of MDAnalysis in published work: -# -# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, -# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. -# MDAnalysis: A Python package for the rapid analysis of molecular dynamics -# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th -# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. -# -# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. -# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. -# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 -# from __future__ import absolute_import import pytest @@ -63,7 +50,7 @@ def test_range(u): assert rdf.edges[-1] == rmax -def test_count_sum(sels): +def test_count_sum(sels, scheduler): # OD1 vs OD2 # should see 577 comparisons in count s1, s2 = sels @@ -86,11 +73,12 @@ def test_double_run(sels): assert len(rdf.count[rdf.count == 3]) == 7 -def test_same_result(sels): +@pytest.mark.parametrize("n_blocks", [1, 2, 3, 4]) +def test_same_result(sels, n_blocks): # should see same results from analysis.rdf and pmda.rdf s1, s2 = sels nrdf = rdf.InterRDF(s1, s2).run() - prdf = InterRDF(s1, s2).run() + prdf = InterRDF(s1, s2).run(n_blocks=n_blocks) assert_almost_equal(nrdf.count, prdf.count) assert_almost_equal(nrdf.rdf, prdf.rdf) diff --git a/pmda/test/test_rdf_s.py b/pmda/test/test_rdf_s.py new file mode 100644 index 00000000..08e2d8f9 --- /dev/null +++ b/pmda/test/test_rdf_s.py @@ -0,0 +1,119 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# PMDA +# Copyright (c) 2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +from __future__ import absolute_import, division + +import pytest + +from numpy.testing import assert_almost_equal + +import MDAnalysis as mda +import numpy as np +from pmda.rdf import InterRDF_s +from MDAnalysis.analysis import rdf + +from MDAnalysisTests.datafiles import GRO_MEMPROT, XTC_MEMPROT + + +@pytest.fixture(scope='module') +def u(): + return mda.Universe(GRO_MEMPROT, XTC_MEMPROT) + + +@pytest.fixture(scope='module') +def sels(u): + s1 = u.select_atoms('name ZND and resid 289') + s2 = u.select_atoms( + '(name OD1 or name OD2) and resid 51 and sphzone 5.0 (resid 289)') + s3 = u.select_atoms('name ZND and (resid 291 or resid 292)') + s4 = u.select_atoms('(name OD1 or name OD2) and sphzone 5.0 (resid 291)') + ags = [[s1, s2], [s3, s4]] + return ags + + +@pytest.fixture(scope='module') +def rdf_s(u, sels, scheduler): + return InterRDF_s(u, sels).run() + + +def test_nbins(u): + ags = sels(u) + rdf = InterRDF_s(u, ags, nbins=412).run() + + assert len(rdf.bins) == 412 + + +def test_range(u): + ags = sels(u) + rmin, rmax = 1.0, 13.0 + rdf = InterRDF_s(u, ags, range=(rmin, rmax)).run() + + assert rdf.edges[0] == rmin + assert rdf.edges[-1] == rmax + + +def test_count_size(rdf_s): + # ZND vs OD1 & OD2 + # should see 2 elements in rdf.count + # 1 element in rdf.count[0] + # 2 elements in rdf.count[0][0] + # 2 elements in rdf.count[1] + # 2 elements in rdf.count[1][0] + # 2 elements in rdf.count[1][1] + assert len(rdf_s.count) == 2 + assert len(rdf_s.count[0]) == 1 + assert len(rdf_s.count[0][0]) == 2 + assert len(rdf_s.count[1]) == 2 + assert len(rdf_s.count[1][0]) == 2 + assert len(rdf_s.count[1][1]) == 2 + + +def test_count(rdf_s): + # should see one distance with 5 counts in count[0][0][1] + # should see one distance with 3 counts in count[1][1][0] + assert len(rdf_s.count[0][0][1][rdf_s.count[0][0][1] == 5]) == 1 + assert len(rdf_s.count[1][1][0][rdf_s.count[1][1][0] == 3]) == 1 + + +def test_double_run(rdf_s): + # running rdf twice should give the same result + assert len(rdf_s.count[0][0][1][rdf_s.count[0][0][1] == 5]) == 1 + assert len(rdf_s.count[1][1][0][rdf_s.count[1][1][0] == 3]) == 1 + + +def test_cdf(rdf_s): + rdf_s.get_cdf() + assert rdf_s.cdf[0][0][0][-1] == rdf_s.count[0][0][0].sum()/rdf_s.nf + + +def test_reduce(rdf_s): + # should see numpy.array addtion + res = [] + single_frame = np.array([np.array([1, 2]), np.array([3])]) + res = rdf_s._reduce(res, single_frame) + res = rdf_s._reduce(res, single_frame) + assert_almost_equal(res[0], np.array([2, 4])) + assert_almost_equal(res[1], np.array([6])) + + +@pytest.mark.parametrize("n_blocks", [1, 2, 3, 4]) +def test_same_result(u, sels, n_blocks): + # should see same results from analysis.rdf.InterRDF_s + # and pmda.rdf.InterRDF_s + nrdf = rdf.InterRDF_s(u, sels).run() + prdf = InterRDF_s(u, sels).run(n_blocks=n_blocks) + assert_almost_equal(nrdf.count[0][0][0], prdf.count[0][0][0]) + assert_almost_equal(nrdf.rdf[0][0][0], prdf.rdf[0][0][0]) + + +@pytest.mark.parametrize("density, value", [ + (True, 13275.775528444701), + (False, 0.021915460340071267)]) +def test_density(u, sels, density, value): + rdf = InterRDF_s(u, sels, density=density).run() + assert_almost_equal(max(rdf.rdf[0][0][0]), value)