Skip to content

Commit

Permalink
Change encore function expect conf distance
Browse files Browse the repository at this point in the history
This changes the methods in encore to the new arbitrary weights system. The last
one conf_distance_matrix does need more refactoring work to simplify the code
before I change it as well.
  • Loading branch information
kain88-de committed Dec 31, 2016
1 parent b893c8f commit a07fcae
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 52 deletions.
48 changes: 26 additions & 22 deletions package/MDAnalysis/analysis/encore/covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,41 +173,34 @@ def shrinkage_covariance_estimator( coordinates,
def covariance_matrix(ensemble,
selection="name CA",
estimator=shrinkage_covariance_estimator,
mass_weighted=True,
weights=None,
reference=None):
"""
Calculates (optionally mass weighted) covariance matrix
Parameters
----------
ensemble : Universe object
The structural ensemble
selection : str
selection : str (optional)
Atom selection string in the MDAnalysis format.
estimator : function
estimator : function (optional)
Function that estimates the covariance matrix. It requires at least
a "coordinates" numpy array (of shape (N,M,3), where N is the number
of frames and M the number of atoms). See ml_covariance_estimator and
shrinkage_covariance_estimator for reference.
mass_weighted : bool
Whether to do a mass-weighted analysis (default is True)
reference : MDAnalysis.Universe object
weights : str/array_like (optional)
specify optional weights. If ``mass`` then chose masses of ensemble atoms
reference : MDAnalysis.Universe object (optional)
Use the distances to a specific reference structure rather than the
distance to the mean.
Returns
-------
cov_mat : numpy.array
Covariance matrix
"""

# Extract coordinates from ensemble
coordinates = ensemble.trajectory.timeseries(
ensemble.select_atoms(selection),
Expand All @@ -231,16 +224,27 @@ def covariance_matrix(ensemble,
sigma = estimator(coordinates, reference_coordinates)


# Optionally correct with mass-weighting
if mass_weighted:
# Optionally correct with weights
if weights:
# Calculate mass-weighted covariance matrix

if selection:
masses = np.repeat(ensemble.select_atoms(selection).masses, 3)
if weights == 'mass':
if selection:
weights = ensemble.select_atoms(selection).masses
else:
weights = ensemble.atoms.masses
else:
masses = np.repeat(ensemble.atoms.masses, 3)

mass_matrix = np.sqrt(np.identity(len(masses))*masses)
sigma = np.dot(mass_matrix, np.dot(sigma, mass_matrix))
if selection:
req_len = ensemble.select_atoms(selection).n_atoms
else:
req_len = ensemble.atoms.n_atoms
if req_len != len(weights):
raise ValueError("number of weights is unequal to number of "
"atoms in ensemble")

# broadcast to a (len(weights), 3) array
weights = np.repeat(weights, 3)

weight_matrix = np.sqrt(np.identity(len(weights))*weights)
sigma = np.dot(weight_matrix, np.dot(sigma, weight_matrix))

return sigma
24 changes: 6 additions & 18 deletions package/MDAnalysis/analysis/encore/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -737,7 +737,7 @@ def prepare_ensembles_for_convergence_increasing_window(ensemble,
def hes(ensembles,
selection="name CA",
cov_estimator="shrinkage",
mass_weighted=True,
weights='mass',
align=False,
details=False,
estimate_error=False,
Expand All @@ -751,52 +751,40 @@ def hes(ensembles,
Parameters
----------
ensembles : list
List of Universe objects for similarity measurements.
selection : str, optional
Atom selection string in the MDAnalysis format. Default is "name CA"
cov_estimator : str, optional
Covariance matrix estimator method, either shrinkage, `shrinkage`,
or Maximum Likelyhood, `ml`. Default is shrinkage.
mass_weighted : bool, optional
Whether to perform mass-weighted covariance matrix estimation
(default is True).
weights : str/array_like, optional
specify optional weights. If ``mass`` then chose masses of ensemble atoms
align : bool, optional
Whether to align the ensembles before calculating their similarity.
Note: this changes the ensembles in-place, and will thus leave your
ensembles in an altered state.
(default is False)
details : bool, optional
Save the mean and covariance matrix for each
ensemble in a numpy array (default is False).
estimate_error : bool, optional
Whether to perform error estimation (default is False).
bootstrapping_samples : int, optional
Number of times the similarity matrix will be bootstrapped (default
is 100), only if estimate_error is True.
calc_diagonal : bool, optional
Whether to calculate the diagonal of the similarity scores
(i.e. the similarities of every ensemble against itself).
If this is False (default), 0.0 will be used instead.
Returns
-------
numpy.array (bidimensional)
Harmonic similarity measurements between each pair of ensembles.
Notes
-----
The method assumes that each ensemble is derived from a multivariate normal
distribution. The mean and covariance matrix are, thus, estimatated from
the distribution of each ensemble and used for comparision by the
Expand Down Expand Up @@ -865,7 +853,7 @@ def hes(ensembles,
for ensemble in ensembles:
mda.analysis.align.AlignTraj(ensemble, ensembles[0],
select=selection,
mass_weighted=True,
weights=weights,
in_memory=True).run()
else:
for ensemble in ensembles:
Expand Down Expand Up @@ -915,7 +903,7 @@ def hes(ensembles,
format=('fac')),
axis=0).flatten())
sigmas.append(covariance_matrix(ensembles_list[i][t],
mass_weighted=True,
weights=weights,
estimator=covariance_estimator,
selection=selection))

Expand Down Expand Up @@ -947,7 +935,7 @@ def hes(ensembles,

# Covariance matrices in each system
sigmas.append(covariance_matrix(e,
mass_weighted=mass_weighted,
weights=weights,
estimator=covariance_estimator,
selection=selection))

Expand Down
23 changes: 11 additions & 12 deletions testsuite/MDAnalysisTests/analysis/test_encore.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,24 +98,24 @@ def test_triangular_matrix():
assert_equal(triangular_matrix[0,1], expected_value,
err_msg="Data error in TriangularMatrix: read/write are not consistent")

assert_equal(triangular_matrix[0,1], triangular_matrix[1,0],
assert_equal(triangular_matrix[0,1], triangular_matrix[1,0],
err_msg="Data error in TriangularMatrix: matrix non symmetrical")
triangular_matrix.savez(filename)

triangular_matrix_2 = encore.utils.TriangularMatrix(size = size, loadfile = filename)
assert_equal(triangular_matrix_2[0,1], expected_value,
assert_equal(triangular_matrix_2[0,1], expected_value,
err_msg="Data error in TriangularMatrix: loaded matrix non symmetrical")

triangular_matrix_3 = encore.utils.TriangularMatrix(size = size)
triangular_matrix_3.loadz(filename)
assert_equal(triangular_matrix_3[0,1], expected_value,
assert_equal(triangular_matrix_3[0,1], expected_value,
err_msg="Data error in TriangularMatrix: loaded matrix non symmetrical")

incremented_triangular_matrix = triangular_matrix + scalar
assert_equal(incremented_triangular_matrix[0,1], expected_value + scalar,
err_msg="Error in TriangularMatrix: addition of scalar gave\
inconsistent results")

triangular_matrix += scalar
assert_equal(triangular_matrix[0,1], expected_value + scalar,
err_msg="Error in TriangularMatrix: addition of scalar gave\
Expand Down Expand Up @@ -149,7 +149,7 @@ def function(x):
assert_equal(r[1], arguments[i][0]**2,
err_msg="Unexpeted results from ParallelCalculation")

def test_rmsd_matrix_with_superimposition(self):
def test_rmsd_matrix_with_superimposition(self):
conf_dist_matrix = encore.confdistmatrix.conformational_distance_matrix(self.ens1,
encore.confdistmatrix.set_rmsd_matrix_elements,
selection = "name CA",
Expand All @@ -162,7 +162,7 @@ def test_rmsd_matrix_with_superimposition(self):

for i,rmsd in enumerate(reference.rmsd):
assert_almost_equal(conf_dist_matrix[0,i], rmsd[2], decimal=3,
err_msg = "calculated RMSD values differ from the reference implementation")
err_msg = "calculated RMSD values differ from the reference implementation")

def test_rmsd_matrix_without_superimposition(self):
selection_string = "name CA"
Expand Down Expand Up @@ -209,11 +209,11 @@ def test_ensemble_superimposition():
def test_ensemble_superimposition_to_reference_non_weighted():
aligned_ensemble1 = mda.Universe(PSF, DCD)
align.AlignTraj(aligned_ensemble1, aligned_ensemble1,
select="name CA", mass_weighted=False,
select="name CA",
in_memory=True).run()
aligned_ensemble2 = mda.Universe(PSF, DCD)
align.AlignTraj(aligned_ensemble2, aligned_ensemble2,
select="name *", mass_weighted=False,
select="name *",
in_memory=True).run()

rmsfs1 = rms.RMSF(aligned_ensemble1.select_atoms('name *'))
Expand All @@ -234,7 +234,7 @@ def test_hes_to_self(self):
err_msg="Harmonic Ensemble Similarity to itself not zero: {0:f}".format(result_value))

def test_hes(self):
results, details = encore.hes([self.ens1, self.ens2], mass_weighted=True)
results, details = encore.hes([self.ens1, self.ens2], weights='mass')
result_value = results[0,1]
min_bound = 1E5
self.assertGreater(result_value, min_bound,
Expand Down Expand Up @@ -264,7 +264,7 @@ def test_ces(self):
expected_value = 0.51
assert_almost_equal(result_value, expected_value, decimal=2,
err_msg="Unexpected value for Cluster Ensemble Similarity: {0:f}. Expected {1:f}.".format(result_value, expected_value))

@dec.skipif(module_not_found('scipy'),
"Test skipped because scipy is not available.")
def test_dres_to_self(self):
Expand Down Expand Up @@ -295,7 +295,7 @@ def test_dres_without_superimposition(self):
expected_value = 0.68
assert_almost_equal(result_value, expected_value, decimal=1,
err_msg="Unexpected value for Dim. reduction Ensemble Similarity: {0:f}. Expected {1:f}.".format(result_value, expected_value))

def test_ces_convergence(self):
expected_values = [0.3443593, 0.1941854, 0.06857104, 0.]
results = encore.ces_convergence(self.ens1, 5)
Expand Down Expand Up @@ -793,4 +793,3 @@ def test_dimensionality_reduction_two_different_methods(self):
method=[encore.StochasticProximityEmbeddingNative(dims[0]),
encore.PrincipalComponentAnalysis(dims[1])])
assert_equal(coordinates[1].shape[0], dims[1])

0 comments on commit a07fcae

Please sign in to comment.