Skip to content
This repository has been archived by the owner on Sep 11, 2023. It is now read-only.

Commit

Permalink
Merge pull request #1242 from marscher/minor
Browse files Browse the repository at this point in the history
Bugfixes and export_to_hdf5
  • Loading branch information
marscher authored Feb 9, 2018
2 parents 4173ebe + 0f74323 commit 3f13a09
Show file tree
Hide file tree
Showing 44 changed files with 628 additions and 489 deletions.
7 changes: 7 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ env:
- CONDA_PY=3.5
- CONDA_PY=3.6

matrix:
exclude: # test only 2.7 on osx.
- env: CONDA_PY=3.5
os: osx
- os: osx
env: CONDA_PY=3.6

before_install:
- source devtools/ci/travis/install_miniconda.sh

Expand Down
9 changes: 9 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,12 @@ def add_np(doctest_namespace):
np.set_printoptions(legacy='1.13')
except TypeError:
pass


@pytest.fixture(autouse=True)
def filter_warnings():
import warnings
old_filters = warnings.filters[:]
warnings.filterwarnings('ignore', message='You have not selected any features. Returning plain coordinates.')
yield
warnings.filters = old_filters
5 changes: 3 additions & 2 deletions devtools/conda-recipe/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ requirements:
- python
- scipy
- setuptools
- gcc # [ not win ]
- toolchain

run:
- bhmm >=0.6,<0.7
- decorator >=4.0.0
- h5py
- libgcc # [linux or osx]
- matplotlib
- mdtraj
- mock # TODO: remove when py3k only.
Expand All @@ -59,6 +59,7 @@ test:
requires:
- pytest
- pytest-cov
- coverage
# TODO: disabled on win64, until https://bugs.python.org/issue31701 is fixed.
- pytest-faulthandler # [not win]
- pytest-xdist
Expand Down
1 change: 1 addition & 0 deletions devtools/conda-recipe/run_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"{njobs_args} "
"--junit-xml={junit_xml} "
"-c {pytest_cfg}"
#"--durations=20 "
.format(test_pkg=test_pkg, cover_pkg=cover_pkg,
junit_xml=junit_xml, pytest_cfg='setup.cfg',
dest_report=os.path.join(os.path.expanduser('~/'), 'coverage.xml'),
Expand Down
2 changes: 1 addition & 1 deletion doc/source/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ your Python installation to at least version 3.5 to catch future updates.
extracted per iteration from a data source. This is invariant to the dimension of data sets. #1190
- datasets: added Prinz potential (quadwell). #1226
- coordinates: added VAMP estimator. #1237

- coordinates: added method 'write_to_hdf5' for easy exporting streams to HDF5. #1242

- References:

Expand Down
4 changes: 0 additions & 4 deletions pyemma/_base/loggable.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,6 @@ def logger(self):
self.__create_logger()
return self._logger_instance

@property
def _logger(self):
return self.logger

def _logger_is_active(self, level):
""" @param level: int log level (debug=10, info=20, warn=30, error=40, critical=50)"""
return self.logger.level >= level
Expand Down
3 changes: 2 additions & 1 deletion pyemma/_base/serialization/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,8 @@ def _get_interpolation_map(cls):
return map

def save(self, file_name, model_name='default', overwrite=False, save_streaming_chain=False):
r"""
r""" saves the current state of this object to given file and name.
Parameters
-----------
file_name: str
Expand Down
11 changes: 7 additions & 4 deletions pyemma/coordinates/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,16 @@ def _check_old_chunksize_arg(chunksize, chunk_size_default, **kw):
chosen_chunk_size = chunksize
else:
import warnings
from pyemma.util.annotators import get_culprit
filename, lineno = get_culprit(3)
if is_default: # case 2.
warnings.warn('Passed deprecated argument "chunk_size", please use "chunksize"',
category=_PyEMMA_DeprecationWarning)
warnings.warn_explicit('Passed deprecated argument "chunk_size", please use "chunksize"',
category=_PyEMMA_DeprecationWarning, filename=filename, lineno=lineno)
chosen_chunk_size = kw.pop('chunk_size') # remove this argument to avoid further passing to other funcs.
else: # case 3.
warnings.warn('Passed two values for chunk size: "chunk_size" and "chunksize", while the first one'
' is deprecated. Please use "chunksize" in the future.', category=_PyEMMA_DeprecationWarning)
warnings.warn_explicit('Passed two values for chunk size: "chunk_size" and "chunksize", while the first one'
' is deprecated. Please use "chunksize" in the future.',
category=_PyEMMA_DeprecationWarning, filename=filename, lineno=lineno)
chosen_chunk_size = chunksize
assert chosen_chunk_size is not NotImplemented
return chosen_chunk_size
Expand Down
3 changes: 2 additions & 1 deletion pyemma/coordinates/clustering/include/bits/kmeans_bits.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,11 @@ KMeans<dtype>::cluster(const np_array &np_chunk, const np_array &np_centers, int
}
#else
{
std::mutex mutex;

std::vector<scoped_thread> threads;
threads.reserve(static_cast<std::size_t>(n_threads));

std::mutex mutex;
std::size_t grainSize = n_frames / n_threads;

auto worker = [&](std::size_t tid, std::size_t begin, std::size_t end, std::mutex& m) {
Expand Down
4 changes: 2 additions & 2 deletions pyemma/coordinates/clustering/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,11 +293,11 @@ def save_dtrajs(self, trajfiles=None, prefix='',

for filename, dtraj in zip(output_files, self.dtrajs):
dest = path.join(output_dir, filename)
self._logger.debug('writing dtraj to "%s"' % dest)
self.logger.debug('writing dtraj to "%s"' % dest)
try:
if path.exists(dest) and not self.overwrite_dtrajs:
raise EnvironmentError('Attempted to write dtraj "%s" which already existed. To automatically'
' overwrite existing files, set source.overwrite_dtrajs=True.' % dest)
write_dtraj(dest, dtraj)
except IOError:
self._logger.exception('Exception during writing dtraj to "%s"' % dest)
self.logger.exception('Exception during writing dtraj to "%s"' % dest)
14 changes: 7 additions & 7 deletions pyemma/coordinates/clustering/kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def _estimate(self, iterable, **kw):
first_chunk = False
self.initial_centers_ = self.clustercenters[:]

self._logger.debug("Accumulated all data, running kmeans on %s", self._in_memory_chunks.shape)
self.logger.debug("Accumulated all data, running kmeans on %s", self._in_memory_chunks.shape)
self._in_memory_chunks_set = True
else:
if len(self.clustercenters) != self.n_clusters:
Expand All @@ -240,11 +240,11 @@ def _estimate(self, iterable, **kw):
callback)
if code == 0:
self._converged = True
self._logger.info("Cluster centers converged after %i steps.", iterations + 1)
self.logger.info("Cluster centers converged after %i steps.", iterations + 1)
else:
self._logger.info("Algorithm did not reach convergence criterion"
self.logger.info("Algorithm did not reach convergence criterion"
" of %g in %i iterations. Consider increasing max_iter.",
self.tolerance, self.max_iter)
self.tolerance, self.max_iter)
self._finish_estimate()

return self
Expand Down Expand Up @@ -272,7 +272,7 @@ def _init_estimate(self):
total_length = sum(traj_lengths)
if not self.n_clusters:
self.n_clusters = min(int(math.sqrt(total_length)), 5000)
self._logger.info("The number of cluster centers was not specified, "
self.logger.info("The number of cluster centers was not specified, "
"using min(sqrt(N), 5000)=%s as n_clusters." % self.n_clusters)
from pyemma.coordinates.data import DataInMemory
if not isinstance(self, MiniBatchKmeansClustering) and not isinstance(self.data_producer, DataInMemory):
Expand Down Expand Up @@ -431,7 +431,7 @@ def _estimate(self, iterable, **kw):

if rel_change <= self.tolerance:
self._converged = True
self._logger.info("Cluster centers converged after %i steps.", i_pass + 1)
self.logger.info("Cluster centers converged after %i steps.", i_pass + 1)
self._progress_force_finish(stage=1)
else:
self._progress_update(1, stage=1)
Expand All @@ -441,6 +441,6 @@ def _estimate(self, iterable, **kw):
self._finish_estimate()

if not self._converged:
self._logger.info("Algorithm did not reach convergence criterion"
self.logger.info("Algorithm did not reach convergence criterion"
" of %g in %i iterations. Consider increasing max_iter.", self.tolerance, self.max_iter)
return self
8 changes: 4 additions & 4 deletions pyemma/coordinates/clustering/tests/test_assign.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,8 @@ def test_assignment_multithread(self):
# re-do assignment with multiple threads and compare results
chunksize = 1000

assignment_mp = coor.assign_to_centers(self.X, self.centers_big, n_jobs=2, chunk_size=chunksize)
assignment_sp = coor.assign_to_centers(self.X, self.centers_big, n_jobs=1, chunk_size=chunksize)
assignment_mp = coor.assign_to_centers(self.X, self.centers_big, n_jobs=2, chunksize=chunksize)
assignment_sp = coor.assign_to_centers(self.X, self.centers_big, n_jobs=1, chunksize=chunksize)

np.testing.assert_equal(assignment_mp, assignment_sp)

Expand All @@ -252,8 +252,8 @@ def test_assignment_multithread_minrsmd(self):
).reshape((N_centers, -1))
chunksize = 1000

assignment_mp = coor.assign_to_centers(reader, centers, n_jobs=2, chunk_size=chunksize, metric='minRMSD')
assignment_sp = coor.assign_to_centers(reader, centers, n_jobs=1, chunk_size=chunksize, metric='minRMSD')
assignment_mp = coor.assign_to_centers(reader, centers, n_jobs=2, chunksize=chunksize, metric='minRMSD')
assignment_sp = coor.assign_to_centers(reader, centers, n_jobs=1, chunksize=chunksize, metric='minRMSD')

np.testing.assert_equal(assignment_mp, assignment_sp)

Expand Down
6 changes: 3 additions & 3 deletions pyemma/coordinates/clustering/uniform_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,17 @@ def _estimate(self, iterable, **kw):
traj_lengths = self.trajectory_lengths(stride=self.stride, skip=self.skip)
total_length = sum(traj_lengths)
self.n_clusters = min(int(math.sqrt(total_length)), 5000)
self._logger.info("The number of cluster centers was not specified, "
self.logger.info("The number of cluster centers was not specified, "
"using min(sqrt(N), 5000)=%s as n_clusters." % self.n_clusters)

# initialize time counters
T = iterable.n_frames_total(stride=self.stride, skip=self.skip)
if self.n_clusters > T:
self.n_clusters = T
self._logger.info('Requested more clusters (k = %i'
self.logger.info('Requested more clusters (k = %i'
' than there are total data points %i)'
'. Will do clustering with k = %i'
% (self.n_clusters, T, T))
% (self.n_clusters, T, T))

# first data point in the middle of the time segment
next_t = (T // self.n_clusters) // 2
Expand Down
47 changes: 47 additions & 0 deletions pyemma/coordinates/data/_base/_in_memory_mixin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@

class InMemoryMixin(object):
""" Performs mapping of an iterable/datasource to memory.
"""

__serialize_version = 0
__serialize_fields = ('_in_memory', '_Y', '_Y_source')

def __init__(self):
super(InMemoryMixin, self).__init__()
self._in_memory = False
self._mapping_to_mem_active = False
self._Y = None
self._Y_source = None

@property
def in_memory(self):
r"""are results stored in memory?"""
return self._in_memory

@in_memory.setter
def in_memory(self, op_in_mem):
r"""
If set to True, the output will be stored in memory.
"""
old_state = self.in_memory
if not old_state and op_in_mem:
self._map_to_memory()
elif not op_in_mem and old_state:
self._clear_in_memory()

def _clear_in_memory(self):
self._Y = None
self._Y_source = None
self._in_memory = False

def _map_to_memory(self, stride=1):
r"""Maps results to memory. Will be stored in attribute :attr:`_Y`."""
self._mapping_to_mem_active = True
try:
self._Y = self.get_output(stride=stride)
from pyemma.coordinates.data import DataInMemory
self._Y_source = DataInMemory(self._Y)
finally:
self._mapping_to_mem_active = False

self._in_memory = True
Loading

0 comments on commit 3f13a09

Please sign in to comment.