diff --git a/package/CHANGELOG b/package/CHANGELOG index f75f30545d5..24f7f8797b5 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -124,6 +124,11 @@ Changes * moved coordinates.base.ChainReader to coordinates.chain.ChainReader * renamed private method ChainReader.get_flname() to ChainReader._get_filename() * totaltime now considers the first frame to be at time 0 (Issue #1137) + * analysis.rms.RMSF now conforms to standard analysis API (PR #1136) + * analysis.rms/align function keyword `mass_weighted` is replaced + by generic `weights='mass'`. This new keyword allows to pass it an + arbitrary weights array as well. (PR 1136) + Deprecations (Issue #599) * Use of rms_fit_trj deprecated in favor of AlignTraj class (Issue #845) diff --git a/package/MDAnalysis/analysis/align.py b/package/MDAnalysis/analysis/align.py index dd3327ea868..8f890cb91d8 100644 --- a/package/MDAnalysis/analysis/align.py +++ b/package/MDAnalysis/analysis/align.py @@ -311,7 +311,7 @@ def _fit_to(mobile_coordinates, ref_coordinates, mobile_atoms, return mobile_atoms, min_rmsd -def alignto(mobile, reference, select="all", mass_weighted=False, +def alignto(mobile, reference, select="all", mass_weighted=None, weights=None, subselection=None, tol_mass=0.1, strict=False): """Spatially align *mobile* to *reference* by doing a RMSD fit on *select* atoms. @@ -343,57 +343,59 @@ def alignto(mobile, reference, select="all", mass_weighted=False, Parameters ---------- - mobile : Universe or AtomGroup - structure to be aligned, a - :class:`~MDAnalysis.core.groups.AtomGroup` or a whole - :class:`~MDAnalysis.core.universe.Universe` - reference : Universe or AtomGroup - reference structure, a :class:`~MDAnalysis.core.groups.AtomGroup` - or a whole :class:`~MDAnalysis.core.universe.Universe` - select: string or dict, optional - 1. any valid selection string for - :meth:`~MDAnalysis.core.groups.AtomGroup.select_atoms` that - produces identical selections in *mobile* and *reference*; or - 2. dictionary ``{'mobile':sel1, 'reference':sel2}``. - (the :func:`fasta2select` function returns such a - dictionary based on a ClustalW_ or STAMP_ sequence alignment); or - 3. tuple ``(sel1, sel2)`` - - When using 2. or 3. with *sel1* and *sel2* then these selections can - also each be a list of selection strings (to generate a AtomGroup with - defined atom order as described under :ref:`ordered-selections-label`). - mass_weighted : boolean, optional - ``True`` uses the masses :meth:`reference.masses` as weights for the - RMSD fit. - tol_mass: float, optional - Reject match if the atomic masses for matched atoms differ by more than - *tol_mass*, default [0.1] - strict: boolean, optional - ``True`` - Will raise :exc:`SelectionError` if a single atom does not - match between the two selections. - ``False`` [default] - Will try to prepare a matching selection by dropping - residues with non-matching atoms. See :func:`get_matching_atoms` - for details. - subselection : string, optional - Apply the transformation only to this selection. - - ``None`` [default] - Apply to `mobile.universe.atoms` (i.e. all atoms in the - context of the selection from *mobile* such as the rest of a - protein, ligands and the surrounding water) - *selection-string* - Apply to `mobile.select_atoms(selection-string)` - :class:`~MDAnalysis.core.groups.AtomGroup` - Apply to the arbitrary group of atoms + mobile : Universe or AtomGroup + structure to be aligned, a + :class:`~MDAnalysis.core.groups.AtomGroup` or a whole + :class:`~MDAnalysis.core.universe.Universe` + reference : Universe or AtomGroup + reference structure, a :class:`~MDAnalysis.core.groups.AtomGroup` + or a whole :class:`~MDAnalysis.core.universe.Universe` + select: string or dict, optional + 1. any valid selection string for + :meth:`~MDAnalysis.core.groups.AtomGroup.select_atoms` that + produces identical selections in *mobile* and *reference*; or + 2. dictionary ``{'mobile':sel1, 'reference':sel2}``. + (the :func:`fasta2select` function returns such a + dictionary based on a ClustalW_ or STAMP_ sequence alignment); or + 3. tuple ``(sel1, sel2)`` + + When using 2. or 3. with *sel1* and *sel2* then these selections can + also each be a list of selection strings (to generate a AtomGroup with + defined atom order as described under :ref:`ordered-selections-label`). + mass_weighted : boolean, optional (deprecated) + ``True`` uses the masses :meth:`reference.masses` as weights for the + RMSD fit. + weights : str/array_like, optional + weights to be used for fit. Can be either 'mass' or an array_like + tol_mass: float, optional + Reject match if the atomic masses for matched atoms differ by more than + *tol_mass*, default [0.1] + strict: boolean, optional + ``True`` + Will raise :exc:`SelectionError` if a single atom does not + match between the two selections. + ``False`` [default] + Will try to prepare a matching selection by dropping + residues with non-matching atoms. See :func:`get_matching_atoms` + for details. + subselection : string, optional + Apply the transformation only to this selection. + + ``None`` [default] + Apply to `mobile.universe.atoms` (i.e. all atoms in the + context of the selection from *mobile* such as the rest of a + protein, ligands and the surrounding water) + *selection-string* + Apply to `mobile.select_atoms(selection-string)` + :class:`~MDAnalysis.core.groups.AtomGroup` + Apply to the arbitrary group of atoms Returns ------- old_rmsd RMSD before spatial alignment new_rmsd - RMSD after spatial alignment. + RMSD after spatial alignment See Also -------- @@ -408,6 +410,9 @@ def alignto(mobile, reference, select="all", mass_weighted=False, Uses :func:`get_matching_atoms` to work with incomplete selections and new *strict* keyword. The new default is to be lenient whereas the old behavior was the equivalent of *strict* = ``True``. + + .. versionchanged:: 0.16.0 + new general 'weights' kwarg replace mass_weights, deprecated 'mass_weights' """ if select in ('all', None): # keep the EXACT order in the input AtomGroups; select_atoms('all') @@ -424,16 +429,18 @@ def alignto(mobile, reference, select="all", mass_weighted=False, tol_mass=tol_mass, strict=strict) - if mass_weighted: - # division by the mean is done in rmsd, not done in qcp, but is done in - # _fit_to + if mass_weighted is not None: + warnings.warn("mass weighted is deprecated argument. Please use " + " 'weights=\"mass\" instead. Will be removed in 0.17.0", + category=DeprecationWarning) + if mass_weighted: + weights = 'mass' + + if weights == 'mass': weights = ref_atoms.masses - ref_com = ref_atoms.center_of_mass() - mobile_com = mobile_atoms.center_of_mass() - else: - weights = None - ref_com = ref_atoms.center_of_geometry() - mobile_com = mobile_atoms.center_of_geometry() + + mobile_com = mobile_atoms.center(weights) + ref_com = ref_atoms.center(weights) ref_coordinates = ref_atoms.positions - ref_com mobile_coordinates = mobile_atoms.positions - mobile_com @@ -485,8 +492,9 @@ class AlignTraj(AnalysisBase): """ def __init__(self, mobile, reference, select='all', filename=None, - prefix='rmsfit_', mass_weighted=False, tol_mass=0.1, - strict=False, force=True, in_memory=False, **kwargs): + prefix='rmsfit_', mass_weighted=None, weights=None, + tol_mass=0.1, strict=False, force=True, in_memory=False, + **kwargs): """Initialization Parameters @@ -503,9 +511,12 @@ def __init__(self, mobile, reference, select='all', filename=None, prefix : string, optional Provide a string to prepend to filename for results to be written to - mass_weighted : boolean, optional + mass_weighted : boolean, optional (deprecated) Boolean, if true will rmsd will be mass-weighted corresponding to the atoms selected in the reference trajectory + weights : str/array_like (optional) + Can either be 'mass' to use reference masses as weights or an + arbitrary array tol_mass : float, optional Tolerance given to `get_matching_atoms` to find appropriate atoms strict : boolean, optional @@ -581,24 +592,29 @@ def __init__(self, mobile, reference, select='all', filename=None, # retained self._writer = mda.Writer(self.filename, natoms) - if mass_weighted: - # if performing a mass-weighted alignment/rmsd calculation - self._weights = self.ref_atoms.masses - else: - self._weights = None + if mass_weighted is not None: + warnings.warn("mass weighted is deprecated argument. Please use " + " 'weights=\"mass\" instead. Will be removed in 0.17.0", + category=DeprecationWarning) + if mass_weighted: + weights = 'mass' + + if weights == 'mass': + weights = self.ref_atoms.masses + self._weights = weights logger.info("RMS-fitting on {0:d} atoms.".format(len(self.ref_atoms))) def _prepare(self): # reference centre of mass system - self._ref_com = self.ref_atoms.center_of_mass() + self._ref_com = self.ref_atoms.center(self._weights) self._ref_coordinates = self.ref_atoms.positions - self._ref_com # allocate the array for selection atom coords self.rmsd = np.zeros((self.n_frames,)) def _single_frame(self): index = self._ts.frame - mobile_com = self.mobile_atoms.center_of_mass() + mobile_com = self.mobile_atoms.center(self._weights) mobile_coordinates = self.mobile_atoms.positions - mobile_com mobile_atoms, self.rmsd[index] = _fit_to(mobile_coordinates, self._ref_coordinates, diff --git a/package/MDAnalysis/analysis/encore/confdistmatrix.py b/package/MDAnalysis/analysis/encore/confdistmatrix.py index 46adf873944..83cbe850725 100644 --- a/package/MDAnalysis/analysis/encore/confdistmatrix.py +++ b/package/MDAnalysis/analysis/encore/confdistmatrix.py @@ -42,6 +42,7 @@ class to compute an RMSD matrix in such a way is also available. from datetime import datetime from time import sleep import logging +import warnings from ...core.universe import Universe @@ -53,8 +54,8 @@ class to compute an RMSD matrix in such a way is also available. def conformational_distance_matrix(ensemble, conf_dist_function, selection="", - superimposition_selection="", n_jobs=1, pairwise_align=True, - mass_weighted=True, metadata=True, verbose=False): + superimposition_selection="", n_jobs=1, pairwise_align=True, weights='mass', + mass_weighted=None, metadata=True, verbose=False): """ Run the conformational distance matrix calculation. args and kwargs are passed to conf_dist_function. @@ -67,18 +68,26 @@ def conformational_distance_matrix(ensemble, conf_dist_function : function object Function that fills the matrix with conformational distance values. See set_rmsd_matrix_elements for an example. - pairwise_align : bool + selection : str, optional + use this selection + superimposition_selection : str, optional + TODO + pairwise_align : bool, optional Whether to perform pairwise alignment between conformations. Default is True (do the superimposition) - mass_weighted : bool + mass_weighted : bool, optional (deprecated) Whether to perform mass-weighted superimposition and metric calculation. Default is True. - metadata : bool + weights : str/array_like, optional + weights to be used for fit. Can be either 'mass' or an array_like + metadata : bool, optional Whether to build a metadata dataset for the calculated matrix. Default is True. - n_jobs : int + n_jobs : int, optional Number of cores to be used for parallel calculation Default is 1. -1 uses all available cores + verbose : bool, optional + enable verbose output Returns ------- @@ -91,6 +100,13 @@ def conformational_distance_matrix(ensemble, framesn = len(ensemble.trajectory.timeseries( ensemble.select_atoms(selection), format='fac')) + if mass_weighted is not None: + warnings.warn("mass weighted is deprecated argument. Please use " + " 'weights=\"mass\" instead. Will be removed in 0.17.0", + category=DeprecationWarning) + if mass_weighted: + weights = 'mass' + # Prepare metadata recarray if metadata: metadata = np.array([(gethostname(), @@ -128,20 +144,32 @@ def conformational_distance_matrix(ensemble, else: fitting_coordinates = None - # Prepare masses as necessary - if mass_weighted: - masses = ensemble.select_atoms(selection).masses.astype(np.float64) + + if weights == 'mass': + weights = ensemble.select_atoms(selection).masses.astype(np.float64) if pairwise_align: - subset_masses = ensemble.select_atoms(subset_selection).masses.astype(np.float64) + subset_weights = ensemble.select_atoms(subset_selection).masses.astype(np.float64) else: - subset_masses = None - else: - masses = np.ones((ensemble.trajectory.timeseries( + subset_weights = None + elif weights is None: + weights = np.ones((ensemble.trajectory.timeseries( ensemble.select_atoms(selection))[0].shape[0])).astype(np.float64) if pairwise_align: - subset_masses = np.ones((fit_coords[0].shape[0])).astype(np.float64) + subset_weights = np.ones((fit_coords[0].shape[0])).astype(np.float64) else: - subset_masses = None + subset_weights = None + else: + if pairwise_align: + if len(weights) != 2: + raise RuntimeError("used pairwise alignment with custom " + "weights. Please provide 2 tuple with " + "weights for 'selection' and " + "'superimposition_selection'") + subset_weights = weights[1] + weights = weights[0] + else: + subset_weights = None + # Allocate for output matrix matsize = framesn * (framesn + 1) / 2 @@ -155,25 +183,23 @@ def conformational_distance_matrix(ensemble, element, rmsd_coordinates, distmat, - masses, + weights, fitting_coordinates, - subset_masses, - masses) for element in indices) + subset_weights) for element in indices) # When the workers have finished, return a TriangularMatrix object return TriangularMatrix(distmat, metadata=metadata) -def set_rmsd_matrix_elements(tasks, coords, rmsdmat, masses, fit_coords=None, - fit_masses=None, pbar_counter=None, *args, **kwargs): +def set_rmsd_matrix_elements(tasks, coords, rmsdmat, weights, fit_coords=None, + fit_weights=None, *args, **kwargs): ''' RMSD Matrix calculator Parameters ---------- - tasks : iterator of int of length 2 Given a triangular matrix, this function will calculate RMSD values from element tasks[0] to tasks[1]. Since the matrix @@ -182,48 +208,43 @@ def set_rmsd_matrix_elements(tasks, coords, rmsdmat, masses, fit_coords=None, The matrix is written as an array in a row-major order (see the TriangularMatrix class for details). - If fit_coords and fit_masses are specified, the structures - will be superimposed before calculating RMSD, and fit_coords and fit_masses + If fit_coords and fit_weights are specified, the structures + will be superimposed before calculating RMSD, and fit_coords and fit_weights will be used to place both structures at their center of mass and - compute the rotation matrix. In this case, both fit_coords and fit_masses + compute the rotation matrix. In this case, both fit_coords and fit_weights must be specified. - coords : numpy.array Array of the ensemble coordinates - - masses : numpy.array - Array of atomic masses, having the same order as the + weights : numpy.array + Array of atomic weights, having the same order as the coordinates array - rmsdmat : encore.utils.TriangularMatrix Memory-shared triangular matrix object - - fit_coords : numpy.array or None + fit_coords : numpy.array or None, optional Array of the coordinates used for fitting - - fit_masses : numpy.array - Array of atomic masses, having the same order as the + fit_weights : numpy.array. optional + Array of atomic weights, having the same order as the fit_coords array ''' i, j = tasks - if fit_coords is None and fit_masses is None: - summasses = np.sum(masses) + if fit_coords is None and fit_weights is None: + sumweights = np.sum(weights) rmsdmat[(i + 1) * i / 2 + j] = PureRMSD(coords[i].astype(np.float64), coords[j].astype(np.float64), coords[j].shape[0], - masses, - summasses) + weights, + sumweights) - elif fit_coords is not None and fit_masses is not None: - summasses = np.sum(masses) - subset_weights = np.asarray(fit_masses) / np.mean(fit_masses) + elif fit_coords is not None and fit_weights is not None: + sumweights = np.sum(weights) + subset_weights = np.asarray(fit_weights) / np.mean(fit_weights) com_i = np.average(fit_coords[i], axis=0, - weights=fit_masses) + weights=fit_weights) translated_i = coords[i] - com_i subset1_coords = fit_coords[i] - com_i com_j = np.average(fit_coords[j], axis=0, - weights=fit_masses) + weights=fit_weights) translated_j = coords[j] - com_j subset2_coords = fit_coords[j] - com_j rotamat = rotation_matrix(subset1_coords, subset2_coords, @@ -231,9 +252,9 @@ def set_rmsd_matrix_elements(tasks, coords, rmsdmat, masses, fit_coords=None, rotated_i = np.transpose(np.dot(rotamat, np.transpose(translated_i))) rmsdmat[(i + 1) * i / 2 + j] = PureRMSD( rotated_i.astype(np.float64), translated_j.astype(np.float64), - coords[j].shape[0], masses, summasses) + coords[j].shape[0], weights, sumweights) else: - raise TypeError("Both fit_coords and fit_masses must be specified \ + raise TypeError("Both fit_coords and fit_weights must be specified \ if one of them is given") @@ -243,7 +264,8 @@ def get_distance_matrix(ensemble, save_matrix=None, superimpose=True, superimposition_subset="name CA", - mass_weighted=True, + weights='mass', + mass_weighted=None, n_jobs=1, verbose=False, *conf_dist_args, @@ -281,9 +303,11 @@ def get_distance_matrix(ensemble, superimposition_subset : str, optional Group for superimposition using MDAnalysis selection syntax (default is CA atoms: "name CA") - mass_weighted : bool, optional - calculate a mass-weighted RMSD (default is True). If set to False + mass_weighted : bool, optional (deprecated) + calculate a mass-weighted RMSD (default is None). If set to False the superimposition will also not be mass-weighted. + weights : str/array_like, optional + weights to be used for fit. Can be either 'mass' or an array_like n_jobs : int, optional Maximum number of cores to be used (default is 1). If -1 use all cores. verbose : bool, optional @@ -324,10 +348,23 @@ def get_distance_matrix(ensemble, # Calculate the matrix else: + + if mass_weighted is not None: + warnings.warn("mass weighted is deprecated argument. Please use " + " 'weights=\"mass\" instead. Will be removed in 0.17.0", + category=DeprecationWarning) + if mass_weighted: + weights = 'mass' + + if weights == 'mass': + weight_type = 'Mass' + elif weights is None: + weight_type = 'None' + else: + weight_type = 'Custom' logging.info( " Perform pairwise alignment: {0}".format(str(superimpose))) - logging.info(" Mass-weighted alignment and RMSD: {0}" - .format(str(mass_weighted))) + logging.info(" weighted alignment and RMSD: {0}".format(weight_type)) if superimpose: logging.info( " Atoms subset for alignment: {0}" @@ -340,7 +377,7 @@ def get_distance_matrix(ensemble, conf_dist_function=set_rmsd_matrix_elements, selection=selection, pairwise_align=superimpose, - mass_weighted=mass_weighted, + weights=weights, n_jobs=n_jobs, verbose=verbose) diff --git a/package/MDAnalysis/analysis/encore/covariance.py b/package/MDAnalysis/analysis/encore/covariance.py index 1cceec3157c..c2e18e340cb 100644 --- a/package/MDAnalysis/analysis/encore/covariance.py +++ b/package/MDAnalysis/analysis/encore/covariance.py @@ -173,41 +173,34 @@ def shrinkage_covariance_estimator( coordinates, def covariance_matrix(ensemble, selection="name CA", estimator=shrinkage_covariance_estimator, - mass_weighted=True, + weights='mass', reference=None): """ Calculates (optionally mass weighted) covariance matrix Parameters ---------- - ensemble : Universe object The structural ensemble - - selection : str + selection : str (optional) Atom selection string in the MDAnalysis format. - - estimator : function + estimator : function (optional) Function that estimates the covariance matrix. It requires at least a "coordinates" numpy array (of shape (N,M,3), where N is the number of frames and M the number of atoms). See ml_covariance_estimator and shrinkage_covariance_estimator for reference. - - mass_weighted : bool - Whether to do a mass-weighted analysis (default is True) - - reference : MDAnalysis.Universe object + weights : str/array_like (optional) + specify weights. If ``'mass'`` then chose masses of ensemble atoms, if ``None`` chose uniform weights + reference : MDAnalysis.Universe object (optional) Use the distances to a specific reference structure rather than the distance to the mean. Returns ------- - cov_mat : numpy.array Covariance matrix """ - # Extract coordinates from ensemble coordinates = ensemble.trajectory.timeseries( ensemble.select_atoms(selection), @@ -231,16 +224,27 @@ def covariance_matrix(ensemble, sigma = estimator(coordinates, reference_coordinates) - # Optionally correct with mass-weighting - if mass_weighted: + # Optionally correct with weights + if weights: # Calculate mass-weighted covariance matrix - - if selection: - masses = np.repeat(ensemble.select_atoms(selection).masses, 3) + if weights == 'mass': + if selection: + weights = ensemble.select_atoms(selection).masses + else: + weights = ensemble.atoms.masses else: - masses = np.repeat(ensemble.atoms.masses, 3) - - mass_matrix = np.sqrt(np.identity(len(masses))*masses) - sigma = np.dot(mass_matrix, np.dot(sigma, mass_matrix)) + if selection: + req_len = ensemble.select_atoms(selection).n_atoms + else: + req_len = ensemble.atoms.n_atoms + if req_len != len(weights): + raise ValueError("number of weights is unequal to number of " + "atoms in ensemble") + + # broadcast to a (len(weights), 3) array + weights = np.repeat(weights, 3) + + weight_matrix = np.sqrt(np.identity(len(weights))*weights) + sigma = np.dot(weight_matrix, np.dot(sigma, weight_matrix)) return sigma diff --git a/package/MDAnalysis/analysis/encore/similarity.py b/package/MDAnalysis/analysis/encore/similarity.py index c7535868ceb..2b93d5f7c3d 100644 --- a/package/MDAnalysis/analysis/encore/similarity.py +++ b/package/MDAnalysis/analysis/encore/similarity.py @@ -737,7 +737,7 @@ def prepare_ensembles_for_convergence_increasing_window(ensemble, def hes(ensembles, selection="name CA", cov_estimator="shrinkage", - mass_weighted=True, + weights='mass', align=False, details=False, estimate_error=False, @@ -751,38 +751,28 @@ def hes(ensembles, Parameters ---------- - ensembles : list List of Universe objects for similarity measurements. - selection : str, optional Atom selection string in the MDAnalysis format. Default is "name CA" - cov_estimator : str, optional Covariance matrix estimator method, either shrinkage, `shrinkage`, or Maximum Likelyhood, `ml`. Default is shrinkage. - - mass_weighted : bool, optional - Whether to perform mass-weighted covariance matrix estimation - (default is True). - + weights : str/array_like, optional + specify optional weights. If ``mass`` then chose masses of ensemble atoms align : bool, optional Whether to align the ensembles before calculating their similarity. Note: this changes the ensembles in-place, and will thus leave your ensembles in an altered state. (default is False) - details : bool, optional Save the mean and covariance matrix for each ensemble in a numpy array (default is False). - estimate_error : bool, optional Whether to perform error estimation (default is False). - bootstrapping_samples : int, optional Number of times the similarity matrix will be bootstrapped (default is 100), only if estimate_error is True. - calc_diagonal : bool, optional Whether to calculate the diagonal of the similarity scores (i.e. the similarities of every ensemble against itself). @@ -790,13 +780,11 @@ def hes(ensembles, Returns ------- - numpy.array (bidimensional) Harmonic similarity measurements between each pair of ensembles. Notes ----- - The method assumes that each ensemble is derived from a multivariate normal distribution. The mean and covariance matrix are, thus, estimatated from the distribution of each ensemble and used for comparision by the @@ -865,7 +853,7 @@ def hes(ensembles, for ensemble in ensembles: mda.analysis.align.AlignTraj(ensemble, ensembles[0], select=selection, - mass_weighted=True, + weights=weights, in_memory=True).run() else: for ensemble in ensembles: @@ -915,7 +903,7 @@ def hes(ensembles, format=('fac')), axis=0).flatten()) sigmas.append(covariance_matrix(ensembles_list[i][t], - mass_weighted=True, + weights=weights, estimator=covariance_estimator, selection=selection)) @@ -947,7 +935,7 @@ def hes(ensembles, # Covariance matrices in each system sigmas.append(covariance_matrix(e, - mass_weighted=mass_weighted, + weights=weights, estimator=covariance_estimator, selection=selection)) diff --git a/package/MDAnalysis/analysis/hole.py b/package/MDAnalysis/analysis/hole.py index 7907681e26b..471b3d8a41e 100644 --- a/package/MDAnalysis/analysis/hole.py +++ b/package/MDAnalysis/analysis/hole.py @@ -111,7 +111,7 @@ u = mda.Universe(MULTIPDB_HOLE) # trajectory # calculate RMSD - R = RMSD(u, reference=ref, select="protein", mass_weighted=True) + R = RMSD(u, reference=ref, select="protein", weights='mass') R.run() # HOLE analysis with order parameters diff --git a/package/MDAnalysis/analysis/rms.py b/package/MDAnalysis/analysis/rms.py index b712a7034e8..8569441b88b 100644 --- a/package/MDAnalysis/analysis/rms.py +++ b/package/MDAnalysis/analysis/rms.py @@ -195,31 +195,25 @@ def rmsd(a, b, weights=None, center=False, superposition=False): if a.shape != b.shape: raise ValueError('a and b must have same shape') - if weights is not None: - if len(weights) != len(a): - raise ValueError('weights must have same length as a/b') - # weights are constructed as relative to the mean - relative_weights = np.asarray(weights) / np.mean(weights) - relative_weights = relative_weights.astype(np.float64) - else: - relative_weights = None - # superposition only works if structures are centered if center or superposition: # make copies (do not change the user data!) # weights=None is equivalent to all weights 1 - a = a - np.average(a, axis=0, weights=weights) - b = b - np.average(b, axis=0, weights=weights) + a -= np.average(a, axis=0, weights=weights) + b -= np.average(b, axis=0, weights=weights) + + if weights is not None: + if len(weights) != len(a): + raise ValueError('weights must have same length as a/b') + # weights are constructed as relative to the mean + weights = np.asarray(weights, dtype=np.float64) / np.mean(weights) if superposition: - if relative_weights is not None: - relative_weights = relative_weights.astype(np.float64) - return qcp.CalcRMSDRotationalMatrix(a, b, N, None, - relative_weights) + return qcp.CalcRMSDRotationalMatrix(a, b, N, None, weights) else: if weights is not None: - return np.sqrt(np.sum(relative_weights[:, np.newaxis] - * (( a - b ) ** 2)) / N) + return np.sqrt(np.sum(weights[:, np.newaxis] + * ((a - b) ** 2)) / N) else: return np.sqrt(np.sum((a - b) ** 2) / N) @@ -291,7 +285,8 @@ class RMSD(AnalysisBase): def __init__(self, atomgroup, reference=None, select='all', groupselections=None, filename="rmsd.dat", - mass_weighted=False, tol_mass=0.1, ref_frame=0, **kwargs): + mass_weighted=None, + weights=None, tol_mass=0.1, ref_frame=0, **kwargs): """Setting up the RMSD analysis. The RMSD will be computed between `select` and `reference` for @@ -333,10 +328,12 @@ def __init__(self, atomgroup, reference=None, select='all', .. Note:: Experimental feature. Only limited error checking implemented. - filename : str (optional) + filename : str, optional write RSMD into file file :meth:`RMSD.save` - mass_weighted : bool (optional) + mass_weighted : bool (deprecated) do a mass-weighted RMSD fit + weights : str/array_like (optional) + choose weights. If 'str' uses masses as weights tol_mass : float (optional) Reject match if the atomic masses for matched atoms differ by more than `tol_mass` @@ -361,7 +358,13 @@ def __init__(self, atomgroup, reference=None, select='all', select = process_selection(select) self.groupselections = ([process_selection(s) for s in groupselections] if groupselections is not None else []) - self.mass_weighted = mass_weighted + if mass_weighted is not None: + warnings.warn("mass weighted is deprecated argument. Please use " + " 'weights=\"mass\" instead. Will be removed in 0.17.0", + category=DeprecationWarning) + if mass_weighted: + weights = 'mass' + self.weights = weights self.tol_mass = tol_mass self.ref_frame = ref_frame self.filename = filename @@ -429,8 +432,10 @@ def __init__(self, atomgroup, reference=None, select='all', def _prepare(self): self._n_atoms = self.mobile_atoms.n_atoms - self._weights = ((self.ref_atoms.masses / self.ref_atoms.masses.mean()).astype(np.float64) - if self.mass_weighted else None) + if self.weights == 'mass': + self.weights = self.ref_atoms.masses + if self.weights is not None: + self.weights = (self.weights / self.weights.mean()).astype(np.float64) current_frame = self.reference.trajectory.ts.frame @@ -439,7 +444,7 @@ def _prepare(self): # (coordinates MUST be stored in case the ref traj is advanced # elsewhere or if ref == mobile universe) self.reference.trajectory[self.ref_frame] - self._ref_com = self.ref_atoms.center_of_mass() + self._ref_com = self.ref_atoms.center(self.weights) # makes a copy self._ref_coordinates = self.ref_atoms.positions - self._ref_com if self._groupselections_atoms: @@ -472,21 +477,22 @@ def _prepare(self): self._mobile_coordinates64 = self.mobile_atoms.positions.copy().astype(np.float64) def _single_frame(self): - mobile_com = self.mobile_atoms.center_of_mass().astype(np.float64) + mobile_com = self.mobile_atoms.center(self.weights).astype(np.float64) self._mobile_coordinates64[:] = self.mobile_atoms.positions self._mobile_coordinates64 -= mobile_com self.rmsd[self._frame_index, :2] = self._ts.frame, self._trajectory.time if self._groupselections_atoms: - # superimpose structures: MDAnalysis qcprot needs Nx3 coordinate array with float64 - # datatype (float32 leads to errors up to 1e-3 in RMSD). - # Note that R is defined in such a way that it acts **to the left** so that we can easily - # use broadcasting and save one expensive numpy transposition. + # superimpose structures: MDAnalysis qcprot needs Nx3 coordinate + # array with float64 datatype (float32 leads to errors up to 1e-3 in + # RMSD). Note that R is defined in such a way that it acts **to the + # left** so that we can easily use broadcasting and save one + # expensive numpy transposition. self.rmsd[self._frame_index, 2] = qcp.CalcRMSDRotationalMatrix( self._ref_coordinates_64, self._mobile_coordinates64, - self._n_atoms, self._rot, self._weights) + self._n_atoms, self._rot, self.weights) self._R[:, :] = self._rot.reshape(3, 3) # Transform each atom in the trajectory (use inplace ops to @@ -496,7 +502,7 @@ def _single_frame(self): # R acts to the left & is broadcasted N times. self._ts.positions[:,:] = (self._mobile_coordinates64[:] * - self._R) + self._R) self._ts.positions[:] += self._ref_com # 2) calculate secondary RMSDs @@ -505,13 +511,13 @@ def _single_frame(self): self._groupselections_atoms), 3): self.rmsd[self._frame_index, igroup] = qcp.CalcRMSDRotationalMatrix( refpos, atoms['mobile'].positions.astype(np.float64), - atoms['mobile'].n_atoms, None, self._weights) + atoms['mobile'].n_atoms, None, self.weights) else: # only calculate RMSD by setting the Rmatrix to None (no need # to carry out the rotation as we already get the optimum RMSD) self.rmsd[self._frame_index, 2] = qcp.CalcRMSDRotationalMatrix( self._ref_coordinates_64, self._mobile_coordinates64, - self._n_atoms, None, self._weights) + self._n_atoms, None, self.weights) self._pm.rmsd = self.rmsd[self._frame_index, 2] @@ -533,33 +539,8 @@ def save(self, filename=None): return filename -class RMSF(object): - """Class to perform RMSF analysis on a set of atoms across a trajectory. - - Run the analysis with :meth:`RMSF.run`, which stores the results - in the array :attr:`RMSF.rmsf`. - - This class performs no coordinate transforms; RMSFs are obtained from atom - coordinates as-is. - - .. versionadded:: 0.11.0 - """ - - def __init__(self, atomgroup): - """Calculate RMSF of given atoms across a trajectory. - - Parameters - ---------- - atomgroup : mda.AtomGroup - AtomGroup to obtain RMSF for - - """ - self.atomgroup = atomgroup - self._rmsf = None - - def run(self, start=None, stop=None, step=None, progout=10, - verbose=None, quiet=None): - """Calculate RMSF of given atoms across a trajectory. +class RMSF(AnalysisBase): + """Calculate RMSF of given atoms across a trajectory. This method implements an algorithm for computing sums of squares while avoiding overflows and underflows [Welford1962]_. @@ -586,44 +567,27 @@ def run(self, start=None, stop=None, step=None, progout=10, .. [Welford1962] B. P. Welford (1962). "Note on a Method for Calculating Corrected Sums of Squares and Products." Technometrics 4(3):419-420. + """ + def __init__(self, atomgroup, weights=None, **kwargs): + super(RMSF, self).__init__(atomgroup.universe.trajectory, **kwargs) + self.atomgroup = atomgroup + if weights == 'mass': + weights = self.atomgroup.masses + self.weights = weights - .. deprecated:: 0.16 - The keyword argument *quiet* is deprecated in favor of *verbose*. - """ - traj = self.atomgroup.universe.trajectory - start, stop, step = traj.check_slice_indices(start, stop, step) - sumsquares = np.zeros((self.atomgroup.n_atoms, 3)) - means = np.array(sumsquares) - - verbose = _set_verbose(verbose, quiet, default=True) - if not verbose: - progout = None - - # set up progress output - if progout: - percentage = ProgressMeter(self.atomgroup.universe.trajectory.n_frames, - interval=progout) - else: - percentage = ProgressMeter(self.atomgroup.universe.trajectory.n_frames, - verbose=False) - - for k, ts in enumerate(self.atomgroup.universe.trajectory[start:stop:step]): - sumsquares += (k/(k + 1.0)) * (self.atomgroup.positions - means)**2 - means = (k * means + self.atomgroup.positions)/(k + 1) + def _prepare(self): + self.sumsquares = np.zeros((self.atomgroup.n_atoms, 3)) + self.mean = self.sumsquares.copy() - percentage.echo(ts.frame) + def _single_frame(self): + k = self._frame_index + self.sumsquares += (k / (k+1.0)) * (self.atomgroup.positions - self.mean) ** 2 + self.mean = (k * self.mean + self.atomgroup.positions) / (k + 1) - rmsf = np.sqrt(sumsquares.sum(axis=1)/(k + 1)) + def _conclude(self): + k = self._frame_index + self.rmsf = np.sqrt(self.sumsquares.sum(axis=1) / (k + 1)) - if not (rmsf >= 0).all(): + if not (self.rmsf >= 0).all(): raise ValueError("Some RMSF values negative; overflow " + "or underflow occurred") - - self._rmsf = rmsf - - @property - def rmsf(self): - """RMSF data; only available after using :meth:`RMSF.run` - - """ - return self._rmsf diff --git a/testsuite/MDAnalysisTests/analysis/test_align.py b/testsuite/MDAnalysisTests/analysis/test_align.py index a26a853ce79..3ab49706aa8 100644 --- a/testsuite/MDAnalysisTests/analysis/test_align.py +++ b/testsuite/MDAnalysisTests/analysis/test_align.py @@ -20,6 +20,8 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # from __future__ import absolute_import, print_function +import warnings +import os.path import MDAnalysis import MDAnalysis.analysis.align as align @@ -33,11 +35,13 @@ import numpy as np from nose.plugins.attrib import attr -import os.path - from MDAnalysisTests.datafiles import PSF, DCD, FASTA from MDAnalysisTests import executable_not_found, parser_not_found, tempdir +# I want to catch all warnings in the tests. If this is not set at the start it +# could cause test that check for warnings to fail. +warnings.simplefilter('always') + class TestRotationMatrix(object): def __init__(self): @@ -104,9 +108,9 @@ def tearDown(self): def test_rmsd(self): self.universe.trajectory[0] # ensure first frame bb = self.universe.select_atoms('backbone') - first_frame = bb.positions + first_frame = bb.positions.copy() self.universe.trajectory[-1] - last_frame = bb.positions + last_frame = bb.positions.copy() assert_almost_equal(rms.rmsd(first_frame, first_frame), 0.0, 5, err_msg="error: rmsd(X,X) should be 0") # rmsd(A,B) = rmsd(B,A) should be exact but spurious failures in the @@ -120,14 +124,26 @@ def test_rmsd(self): assert_almost_equal(rmsd, 6.820321761927005, 5, err_msg="RMSD calculation between 1st and last " "AdK frame gave wrong answer") - #test mass_weighted + # test masses as weights last_atoms_weight = self.universe.atoms.masses A = self.universe.trajectory[0] B = self.reference.trajectory[-1] - rmsd = align.alignto(self.universe, self.reference, mass_weighted=True) - rmsd_sup_weight = rms.rmsd(A, B, weights=last_atoms_weight, center=True, superposition=True) + rmsd = align.alignto(self.universe, self.reference, weights='mass') + rmsd_sup_weight = rms.rmsd(A, B, weights=last_atoms_weight, + center=True, superposition=True) assert_almost_equal(rmsd[1], rmsd_sup_weight, 6) + def test_rmsd_deprecated(self): + last_atoms_weight = self.universe.atoms.masses + A = self.universe.trajectory[0] + B = self.reference.trajectory[-1] + with warnings.catch_warnings(record=True) as warn: + warnings.simplefilter('always') + rmsd = align.alignto(self.universe, self.reference, mass_weighted=True) + assert_equal(len(warn), 1) + rmsd_sup_weight = rms.rmsd(A, B, weights=last_atoms_weight, + center=True, superposition=True) + assert_almost_equal(rmsd[1], rmsd_sup_weight, 6) @dec.slow @attr('issue') @@ -170,8 +186,8 @@ def test_AlignTraj(self): rmsd_outfile = os.path.join(self.tempdir.name, 'rmsd') x.save(rmsd_outfile) - assert_almost_equal(x.rmsd[0], 6.929083044751061, decimal=3) - assert_almost_equal(x.rmsd[-1], 5.279731379771749336e-07, decimal=3) + assert_almost_equal(x.rmsd[0], 6.9290, decimal=3) + assert_almost_equal(x.rmsd[-1], 5.2797e-07, decimal=3) # RMSD against the reference frame # calculated on Mac OS X x86 with MDA 0.7.2 r689 @@ -186,10 +202,25 @@ def test_AlignTraj(self): def test_AlignTraj_weighted(self): x = align.AlignTraj(self.universe, self.reference, - filename=self.outfile, mass_weighted=True).run() + filename=self.outfile, weights='mass').run() + fitted = MDAnalysis.Universe(PSF, self.outfile) + assert_almost_equal(x.rmsd[0], 0, decimal=3) + assert_almost_equal(x.rmsd[-1], 6.9033, decimal=3) + + self._assert_rmsd(fitted, 0, 0.0, + weights=self.universe.atoms.masses) + self._assert_rmsd(fitted, -1, 6.929083032629219, + weights=self.universe.atoms.masses) + + def test_AlignTraj_weigts_deprecated(self): + with warnings.catch_warnings(record=True) as warn: + warnings.simplefilter('always') + x = align.AlignTraj(self.universe, self.reference, + filename=self.outfile, mass_weighted=True).run() + assert_equal(len(warn), 1) fitted = MDAnalysis.Universe(PSF, self.outfile) assert_almost_equal(x.rmsd[0], 0, decimal=3) - assert_almost_equal(x.rmsd[-1], 6.9033990467077579, decimal=3) + assert_almost_equal(x.rmsd[-1], 6.9033, decimal=3) self._assert_rmsd(fitted, 0, 0.0, weights=self.universe.atoms.masses) @@ -199,15 +230,15 @@ def test_AlignTraj_weighted(self): def test_AlignTraj_partial_fit(self): # fitting on a partial selection should still write the whole topology align.AlignTraj(self.universe, self.reference, select='resid 1-20', - filename=self.outfile, mass_weighted=True).run() + filename=self.outfile, weights='mass').run() MDAnalysis.Universe(PSF, self.outfile) def test_AlignTraj_in_memory(self): self.reference.trajectory[-1] x = align.AlignTraj(self.universe, self.reference, filename=self.outfile, in_memory=True).run() - assert_almost_equal(x.rmsd[0], 6.929083044751061, decimal=3) - assert_almost_equal(x.rmsd[-1], 5.279731379771749336e-07, decimal=3) + assert_almost_equal(x.rmsd[0], 6.9290, decimal=3) + assert_almost_equal(x.rmsd[-1], 5.2797e-07, decimal=3) # check in memory trajectory self._assert_rmsd(self.universe, 0, 6.929083044751061) diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py index 1edc7cd207e..d6b56dabad3 100644 --- a/testsuite/MDAnalysisTests/analysis/test_encore.py +++ b/testsuite/MDAnalysisTests/analysis/test_encore.py @@ -159,7 +159,7 @@ def test_rmsd_matrix_with_superimposition(self): encore.confdistmatrix.set_rmsd_matrix_elements, selection="name CA", pairwise_align=True, - mass_weighted=True, + weights='mass', n_jobs=1) reference = rms.RMSD(self.ens1, select = "name CA") @@ -182,7 +182,7 @@ def test_rmsd_matrix_without_superimposition(self): encore.confdistmatrix.set_rmsd_matrix_elements, selection=selection_string, pairwise_align=False, - mass_weighted=True, + weights='mass', n_jobs=1) print (repr(confdist_matrix.as_array()[0,:])) @@ -214,11 +214,11 @@ def test_ensemble_superimposition(): def test_ensemble_superimposition_to_reference_non_weighted(): aligned_ensemble1 = mda.Universe(PSF, DCD) align.AlignTraj(aligned_ensemble1, aligned_ensemble1, - select="name CA", mass_weighted=False, + select="name CA", in_memory=True).run() aligned_ensemble2 = mda.Universe(PSF, DCD) align.AlignTraj(aligned_ensemble2, aligned_ensemble2, - select="name *", mass_weighted=False, + select="name *", in_memory=True).run() rmsfs1 = rms.RMSF(aligned_ensemble1.select_atoms('name *')) @@ -239,7 +239,7 @@ def test_hes_to_self(self): err_msg="Harmonic Ensemble Similarity to itself not zero: {0:f}".format(result_value)) def test_hes(self): - results, details = encore.hes([self.ens1, self.ens2], mass_weighted=True) + results, details = encore.hes([self.ens1, self.ens2], weights='mass') result_value = results[0,1] min_bound = 1E5 self.assertGreater(result_value, min_bound, diff --git a/testsuite/MDAnalysisTests/analysis/test_rms.py b/testsuite/MDAnalysisTests/analysis/test_rms.py index d3dfb3da092..82072ced0fe 100644 --- a/testsuite/MDAnalysisTests/analysis/test_rms.py +++ b/testsuite/MDAnalysisTests/analysis/test_rms.py @@ -20,9 +20,12 @@ # J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 # from __future__ import print_function - from six.moves import range +import warnings +import os +import sys + import MDAnalysis import MDAnalysis as mda from MDAnalysis.analysis import rms, align @@ -33,13 +36,14 @@ import numpy as np -import os -import sys - from MDAnalysis.exceptions import SelectionError, NoDataError from MDAnalysisTests.datafiles import GRO, XTC, rmsfArray, PSF, DCD from MDAnalysisTests import tempdir, parser_not_found +# I want to catch all warnings in the tests. If this is not set at the start it +# could cause test that check for warnings to fail. +warnings.simplefilter('always') + class Testrmsd(object): @dec.skipif(parser_not_found('DCD'), @@ -142,6 +146,7 @@ def test_with_superposition_equal(self): rmsd_superposition = rms.rmsd(A, B, center=True, superposition=True) assert_almost_equal(rmsd, rmsd_superposition) + class TestRMSD(object): @dec.skipif(parser_not_found('DCD'), 'DCD parser not available. Are you using python 3?') @@ -151,7 +156,7 @@ def setUp(self): self.outfile = os.path.join(self.tempdir.name, 'rmsd.txt') self.correct_values = [[0, 0, 0], [49, 48.9999, 4.68953]] self.correct_values_group = [[0, 0, 0, 0, 0], - [49, 48.9999, 4.7857, 4.7002, + [49, 49, 4.7857, 4.7004, 4.68981]] def tearDown(self): @@ -185,7 +190,19 @@ def test_rmsd_single_frame(self): def test_mass_weighted_and_save(self): RMSD = MDAnalysis.analysis.rms.RMSD(self.universe, select='name CA', - step=49, mass_weighted=True).run() + step=49, weights='mass').run() + RMSD.save(self.outfile) + saved = np.loadtxt(self.outfile) + assert_array_almost_equal(RMSD.rmsd, saved, 4, + err_msg="error: rmsd profile should match " + + "test values") + + def test_mass_weighted_and_save_deprecated(self): + with warnings.catch_warnings(record=True) as warn: + warnings.simplefilter('always') + RMSD = MDAnalysis.analysis.rms.RMSD(self.universe, select='name CA', + step=49, mass_weighted=True).run() + assert_equal(len(warn), 1) RMSD.save(self.outfile) saved = np.loadtxt(self.outfile) assert_array_almost_equal(RMSD.rmsd, saved, 4, @@ -229,6 +246,7 @@ def test_save_before_run(self): RMSD = MDAnalysis.analysis.rms.RMSD(self.universe) RMSD.save('blah') + class TestRMSF(TestCase): def setUp(self): self.universe = MDAnalysis.Universe(GRO, XTC) @@ -241,7 +259,7 @@ def tearDown(self): def test_rmsf(self): rmsfs = MDAnalysis.analysis.rms.RMSF(self.universe.select_atoms('name CA')) - rmsfs.run(verbose=False) + rmsfs.run() test_rmsfs = np.load(rmsfArray) assert_almost_equal(rmsfs.rmsf, test_rmsfs, 5, @@ -249,8 +267,8 @@ def test_rmsf(self): "values") def test_rmsf_single_frame(self): - rmsfs = MDAnalysis.analysis.rms.RMSF(self.universe.select_atoms('name CA')) - rmsfs.run(start=5, stop=6, verbose=False) + rmsfs = MDAnalysis.analysis.rms.RMSF(self.universe.select_atoms('name CA'), start=5, stop=6) + rmsfs.run() assert_almost_equal(rmsfs.rmsf, 0, 5, err_msg="error: rmsfs should all be zero") @@ -263,7 +281,7 @@ def test_rmsf_identical_frames(self): self.universe = MDAnalysis.Universe(GRO, self.outfile) rmsfs = MDAnalysis.analysis.rms.RMSF(self.universe.select_atoms('name CA')) - rmsfs.run(verbose=False) + rmsfs.run() assert_almost_equal(rmsfs.rmsf, 0, 5, err_msg="error: rmsfs should all be 0")