diff --git a/package/MDAnalysis/analysis/encore/covariance.py b/package/MDAnalysis/analysis/encore/covariance.py index 1cceec3157c..4d5d619b0b7 100644 --- a/package/MDAnalysis/analysis/encore/covariance.py +++ b/package/MDAnalysis/analysis/encore/covariance.py @@ -173,41 +173,34 @@ def shrinkage_covariance_estimator( coordinates, def covariance_matrix(ensemble, selection="name CA", estimator=shrinkage_covariance_estimator, - mass_weighted=True, + weights=None, reference=None): """ Calculates (optionally mass weighted) covariance matrix Parameters ---------- - ensemble : Universe object The structural ensemble - - selection : str + selection : str (optional) Atom selection string in the MDAnalysis format. - - estimator : function + estimator : function (optional) Function that estimates the covariance matrix. It requires at least a "coordinates" numpy array (of shape (N,M,3), where N is the number of frames and M the number of atoms). See ml_covariance_estimator and shrinkage_covariance_estimator for reference. - - mass_weighted : bool - Whether to do a mass-weighted analysis (default is True) - - reference : MDAnalysis.Universe object + weights : str/array_like (optional) + specify optional weights. If ``mass`` then chose masses of ensemble atoms + reference : MDAnalysis.Universe object (optional) Use the distances to a specific reference structure rather than the distance to the mean. Returns ------- - cov_mat : numpy.array Covariance matrix """ - # Extract coordinates from ensemble coordinates = ensemble.trajectory.timeseries( ensemble.select_atoms(selection), @@ -231,16 +224,27 @@ def covariance_matrix(ensemble, sigma = estimator(coordinates, reference_coordinates) - # Optionally correct with mass-weighting - if mass_weighted: + # Optionally correct with weights + if weights: # Calculate mass-weighted covariance matrix - - if selection: - masses = np.repeat(ensemble.select_atoms(selection).masses, 3) + if weights == 'mass': + if selection: + weights = ensemble.select_atoms(selection).masses + else: + weights = ensemble.atoms.masses else: - masses = np.repeat(ensemble.atoms.masses, 3) - - mass_matrix = np.sqrt(np.identity(len(masses))*masses) - sigma = np.dot(mass_matrix, np.dot(sigma, mass_matrix)) + if selection: + req_len = ensemble.select_atoms(selection).n_atoms + else: + req_len = ensemble.atoms.n_atoms + if req_len != len(weights): + raise ValueError("number of weights is unequal to number of " + "atoms in ensemble") + + # broadcast to a (len(weights), 3) array + weights = np.repeat(weights, 3) + + weight_matrix = np.sqrt(np.identity(len(weights))*weights) + sigma = np.dot(weight_matrix, np.dot(sigma, weight_matrix)) return sigma diff --git a/package/MDAnalysis/analysis/encore/similarity.py b/package/MDAnalysis/analysis/encore/similarity.py index f77b294ae91..82dc9d6e495 100644 --- a/package/MDAnalysis/analysis/encore/similarity.py +++ b/package/MDAnalysis/analysis/encore/similarity.py @@ -737,7 +737,7 @@ def prepare_ensembles_for_convergence_increasing_window(ensemble, def hes(ensembles, selection="name CA", cov_estimator="shrinkage", - mass_weighted=True, + weights='mass', align=False, details=False, estimate_error=False, @@ -751,38 +751,28 @@ def hes(ensembles, Parameters ---------- - ensembles : list List of Universe objects for similarity measurements. - selection : str, optional Atom selection string in the MDAnalysis format. Default is "name CA" - cov_estimator : str, optional Covariance matrix estimator method, either shrinkage, `shrinkage`, or Maximum Likelyhood, `ml`. Default is shrinkage. - - mass_weighted : bool, optional - Whether to perform mass-weighted covariance matrix estimation - (default is True). - + weights : str/array_like, optional + specify optional weights. If ``mass`` then chose masses of ensemble atoms align : bool, optional Whether to align the ensembles before calculating their similarity. Note: this changes the ensembles in-place, and will thus leave your ensembles in an altered state. (default is False) - details : bool, optional Save the mean and covariance matrix for each ensemble in a numpy array (default is False). - estimate_error : bool, optional Whether to perform error estimation (default is False). - bootstrapping_samples : int, optional Number of times the similarity matrix will be bootstrapped (default is 100), only if estimate_error is True. - calc_diagonal : bool, optional Whether to calculate the diagonal of the similarity scores (i.e. the similarities of every ensemble against itself). @@ -790,13 +780,11 @@ def hes(ensembles, Returns ------- - numpy.array (bidimensional) Harmonic similarity measurements between each pair of ensembles. Notes ----- - The method assumes that each ensemble is derived from a multivariate normal distribution. The mean and covariance matrix are, thus, estimatated from the distribution of each ensemble and used for comparision by the @@ -865,7 +853,7 @@ def hes(ensembles, for ensemble in ensembles: mda.analysis.align.AlignTraj(ensemble, ensembles[0], select=selection, - mass_weighted=True, + weights=weights, in_memory=True).run() else: for ensemble in ensembles: @@ -915,7 +903,7 @@ def hes(ensembles, format=('fac')), axis=0).flatten()) sigmas.append(covariance_matrix(ensembles_list[i][t], - mass_weighted=True, + weights=weights, estimator=covariance_estimator, selection=selection)) @@ -947,7 +935,7 @@ def hes(ensembles, # Covariance matrices in each system sigmas.append(covariance_matrix(e, - mass_weighted=mass_weighted, + weights=weights, estimator=covariance_estimator, selection=selection)) diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py index c831aba0599..34a19430f74 100644 --- a/testsuite/MDAnalysisTests/analysis/test_encore.py +++ b/testsuite/MDAnalysisTests/analysis/test_encore.py @@ -98,24 +98,24 @@ def test_triangular_matrix(): assert_equal(triangular_matrix[0,1], expected_value, err_msg="Data error in TriangularMatrix: read/write are not consistent") - assert_equal(triangular_matrix[0,1], triangular_matrix[1,0], + assert_equal(triangular_matrix[0,1], triangular_matrix[1,0], err_msg="Data error in TriangularMatrix: matrix non symmetrical") triangular_matrix.savez(filename) triangular_matrix_2 = encore.utils.TriangularMatrix(size = size, loadfile = filename) - assert_equal(triangular_matrix_2[0,1], expected_value, + assert_equal(triangular_matrix_2[0,1], expected_value, err_msg="Data error in TriangularMatrix: loaded matrix non symmetrical") triangular_matrix_3 = encore.utils.TriangularMatrix(size = size) triangular_matrix_3.loadz(filename) - assert_equal(triangular_matrix_3[0,1], expected_value, + assert_equal(triangular_matrix_3[0,1], expected_value, err_msg="Data error in TriangularMatrix: loaded matrix non symmetrical") incremented_triangular_matrix = triangular_matrix + scalar assert_equal(incremented_triangular_matrix[0,1], expected_value + scalar, err_msg="Error in TriangularMatrix: addition of scalar gave\ inconsistent results") - + triangular_matrix += scalar assert_equal(triangular_matrix[0,1], expected_value + scalar, err_msg="Error in TriangularMatrix: addition of scalar gave\ @@ -149,7 +149,7 @@ def function(x): assert_equal(r[1], arguments[i][0]**2, err_msg="Unexpeted results from ParallelCalculation") - def test_rmsd_matrix_with_superimposition(self): + def test_rmsd_matrix_with_superimposition(self): conf_dist_matrix = encore.confdistmatrix.conformational_distance_matrix(self.ens1, encore.confdistmatrix.set_rmsd_matrix_elements, selection = "name CA", @@ -162,7 +162,7 @@ def test_rmsd_matrix_with_superimposition(self): for i,rmsd in enumerate(reference.rmsd): assert_almost_equal(conf_dist_matrix[0,i], rmsd[2], decimal=3, - err_msg = "calculated RMSD values differ from the reference implementation") + err_msg = "calculated RMSD values differ from the reference implementation") def test_rmsd_matrix_without_superimposition(self): selection_string = "name CA" @@ -209,11 +209,11 @@ def test_ensemble_superimposition(): def test_ensemble_superimposition_to_reference_non_weighted(): aligned_ensemble1 = mda.Universe(PSF, DCD) align.AlignTraj(aligned_ensemble1, aligned_ensemble1, - select="name CA", mass_weighted=False, + select="name CA", in_memory=True).run() aligned_ensemble2 = mda.Universe(PSF, DCD) align.AlignTraj(aligned_ensemble2, aligned_ensemble2, - select="name *", mass_weighted=False, + select="name *", in_memory=True).run() rmsfs1 = rms.RMSF(aligned_ensemble1.select_atoms('name *')) @@ -234,7 +234,7 @@ def test_hes_to_self(self): err_msg="Harmonic Ensemble Similarity to itself not zero: {0:f}".format(result_value)) def test_hes(self): - results, details = encore.hes([self.ens1, self.ens2], mass_weighted=True) + results, details = encore.hes([self.ens1, self.ens2], weights='mass') result_value = results[0,1] min_bound = 1E5 self.assertGreater(result_value, min_bound, @@ -264,7 +264,7 @@ def test_ces(self): expected_value = 0.51 assert_almost_equal(result_value, expected_value, decimal=2, err_msg="Unexpected value for Cluster Ensemble Similarity: {0:f}. Expected {1:f}.".format(result_value, expected_value)) - + @dec.skipif(module_not_found('scipy'), "Test skipped because scipy is not available.") def test_dres_to_self(self): @@ -295,7 +295,7 @@ def test_dres_without_superimposition(self): expected_value = 0.68 assert_almost_equal(result_value, expected_value, decimal=1, err_msg="Unexpected value for Dim. reduction Ensemble Similarity: {0:f}. Expected {1:f}.".format(result_value, expected_value)) - + def test_ces_convergence(self): expected_values = [0.3443593, 0.1941854, 0.06857104, 0.] results = encore.ces_convergence(self.ens1, 5) @@ -793,4 +793,3 @@ def test_dimensionality_reduction_two_different_methods(self): method=[encore.StochasticProximityEmbeddingNative(dims[0]), encore.PrincipalComponentAnalysis(dims[1])]) assert_equal(coordinates[1].shape[0], dims[1]) -