From a2d6488969dda8092e40a752eb8950f0e2aba707 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Wed, 6 May 2015 17:23:41 +0200 Subject: [PATCH 1/7] Prototype for a customisable back-end selection module. ```` # 32-bit on the GPU. from sknn.backend import gpu32 # 64-bit on the GPU. from sknn.backend import cpu64 ```` Specifying the number of bits is optional. --- sknn/__init__.py | 44 +++++++++++++++++++++++++++++++++++++++++++- sknn/mlp.py | 10 ---------- 2 files changed, 43 insertions(+), 11 deletions(-) diff --git a/sknn/__init__.py b/sknn/__init__.py index 0307f4e..f84e2ba 100644 --- a/sknn/__init__.py +++ b/sknn/__init__.py @@ -1,4 +1,46 @@ -from __future__ import (absolute_import, unicode_literals) +# -*- coding: utf-8 -*- +from __future__ import (absolute_import, unicode_literals, print_function) __author__ = 'ssamot, alexjc' __version__ = '0.1' + + +import os +import sys +import logging + + +class TheanoConfigurator(object): + + def __init__(self): + self.configured = False + + def configure(self, flags): + if self.configured or 'theano' in sys.modules: + return + + os.environ.setdefault('THEANO_FLAGS', flags+',print_active_device=False') + print('flags', flags) + cuda = logging.getLogger('theano.sandbox.cuda') + cuda.setLevel(logging.CRITICAL) + import theano + cuda.setLevel(logging.WARNING) + + self.configured = True + + def __getattr__(self, name): + flags = '' + if name.endswith('32'): + flags = ',floatX=float32' + if name.endswith('64'): + flags = ',floatX=float32' + + if name.startswith('cpu'): + return self.configure('device=cpu'+flags) + if name.startswith('gpu'): + return self.configure('device=gpu'+flags) + + return getattr(sys.modules['sknn'], name) + + +sys.modules['sknn.backend'] = TheanoConfigurator() diff --git a/sknn/mlp.py b/sknn/mlp.py index 174a692..ab2c9c2 100644 --- a/sknn/mlp.py +++ b/sknn/mlp.py @@ -11,16 +11,6 @@ log = logging.getLogger('sknn') -# By default, we force Theano to use a GPU and fallback to CPU, using 32-bits. -# This must be done in the code before Theano is imported for the first time. -os.environ['THEANO_FLAGS'] = "device=gpu,floatX=float32" - -cuda = logging.getLogger('theano.sandbox.cuda') -cuda.setLevel(logging.CRITICAL) -import theano -cuda.setLevel(logging.WARNING) - - import numpy import sklearn.base import sklearn.pipeline From 67e8808503d61c73bb6350813b061cb3cc580e40 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Wed, 6 May 2015 23:46:45 +0200 Subject: [PATCH 2/7] Improving the logging for the backend configuration module, fix for import now missing. --- sknn/__init__.py | 7 ++++++- sknn/mlp.py | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/sknn/__init__.py b/sknn/__init__.py index f84e2ba..17693af 100644 --- a/sknn/__init__.py +++ b/sknn/__init__.py @@ -16,7 +16,12 @@ def __init__(self): self.configured = False def configure(self, flags): - if self.configured or 'theano' in sys.modules: + if self.configured is True: + return + + if 'theano' in sys.modules: + log = logging.getLogger('sknn') + log.warn('Theano was already imported and cannot be reconfigured.') return os.environ.setdefault('THEANO_FLAGS', flags+',print_active_device=False') diff --git a/sknn/mlp.py b/sknn/mlp.py index ab2c9c2..e87648a 100644 --- a/sknn/mlp.py +++ b/sknn/mlp.py @@ -12,6 +12,7 @@ import numpy +import theano import sklearn.base import sklearn.pipeline import sklearn.preprocessing From 127bf60bf2d6be2fda1b112cca2eb174ade56969 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Thu, 7 May 2015 17:23:01 +0200 Subject: [PATCH 3/7] Further configuration of theano via the `backend` module, logging what's going on. --- .coveragerc | 2 ++ build | 5 +++++ examples/plot_mlp.py | 12 +++++++----- sknn/__init__.py | 10 +++++++--- sknn/dataset.py | 4 +--- 5 files changed, 22 insertions(+), 11 deletions(-) create mode 100644 .coveragerc create mode 100755 build diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..0dd597b --- /dev/null +++ b/.coveragerc @@ -0,0 +1,2 @@ +[run] +omit=*pywrap2* diff --git a/build b/build new file mode 100755 index 0000000..fc573b8 --- /dev/null +++ b/build @@ -0,0 +1,5 @@ +#!/bin/sh +/usr/local/bin/virtualenv -p /usr/local/bin/python2.7 venv --clear --no-site-packages +venv/bin/python setup.py sdist +/usr/local/bin/twine upload -r test dist/*.tar.gz +venv/bin/pip install -i https://testpypi.python.org/pypi scikit-neuralnetwork diff --git a/examples/plot_mlp.py b/examples/plot_mlp.py index 4ddd3f1..4e9ee09 100644 --- a/examples/plot_mlp.py +++ b/examples/plot_mlp.py @@ -11,8 +11,8 @@ import logging import argparse import itertools -import numpy as np +import numpy from matplotlib import pyplot as plt from matplotlib.colors import ListedColormap @@ -24,8 +24,10 @@ import logging logging.basicConfig(format="%(message)s", level=logging.WARNING, stream=sys.stdout) +from sknn.backend import gpu32 from sknn import mlp + # All possible parameter options that can be plotted, separately or combined. PARAMETERS = { 'activation': ['Rectifier', 'Tanh', 'Sigmoid', 'Maxout'], @@ -74,7 +76,7 @@ seed = int(time.time()) X, y = make_classification(n_features=2, n_redundant=0, n_informative=2, random_state=0, n_clusters_per_class=1) -rng = np.random.RandomState(seed+1) +rng = numpy.random.RandomState(seed+1) X += 2 * rng.uniform(size=X.shape) linearly_separable = (X, y) @@ -94,8 +96,8 @@ # Prepare coordinates of 2D grid to be visualized. x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 - xx, yy = np.meshgrid(np.arange(x_min, x_max, GRID_RESOLUTION), - np.arange(y_min, y_max, GRID_RESOLUTION)) + xx, yy = numpy.meshgrid(numpy.arange(x_min, x_max, GRID_RESOLUTION), + numpy.arange(y_min, y_max, GRID_RESOLUTION)) # Plot the dataset on its own first. cm = plt.cm.get_cmap("PRGn") @@ -118,7 +120,7 @@ # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, m_max]x[y_min, y_max]. - Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, 1] + Z = clf.predict_proba(numpy.c_[xx.ravel(), yy.ravel()])[:, 1] # Put the result into a color plot Z = Z.reshape(xx.shape) diff --git a/sknn/__init__.py b/sknn/__init__.py index 17693af..fb1e380 100644 --- a/sknn/__init__.py +++ b/sknn/__init__.py @@ -14,24 +14,28 @@ class TheanoConfigurator(object): def __init__(self): self.configured = False + self.log = logging.getLogger('sknn') def configure(self, flags): if self.configured is True: return if 'theano' in sys.modules: - log = logging.getLogger('sknn') - log.warn('Theano was already imported and cannot be reconfigured.') + self.log.warning('Theano was already imported and cannot be reconfigured.') return os.environ.setdefault('THEANO_FLAGS', flags+',print_active_device=False') - print('flags', flags) cuda = logging.getLogger('theano.sandbox.cuda') cuda.setLevel(logging.CRITICAL) import theano + import theano.sandbox.cuda as cd cuda.setLevel(logging.WARNING) self.configured = True + try: + self.log.info('Using device gpu%i: %s', cd.active_device_number(), cd.active_device_name()) + except AttributeError: + self.log.info('Using device cpu0, with %r.', theano.config.floatX) def __getattr__(self, name): flags = '' diff --git a/sknn/dataset.py b/sknn/dataset.py index d4954cf..7e1894a 100644 --- a/sknn/dataset.py +++ b/sknn/dataset.py @@ -7,9 +7,7 @@ from pylearn2.utils.iteration import (FiniteDatasetIterator, resolve_iterator_class) import functools - import theano -floatX = theano.config.floatX class SparseDesignMatrix(Dataset): @@ -83,7 +81,7 @@ def iterator(self, mode=None, batch_size=None, num_batches=None, sub_spaces = space.components sub_sources = source - conv_fn = lambda x: x.todense().astype(floatX) + conv_fn = lambda x: x.todense().astype(theano.config.floatX) convert = [] for sp, src in safe_zip(sub_spaces, sub_sources): convert.append(conv_fn if src in ('features', 'targets') else None) From 41a89cc979442c6033826b87f00028506cb88383 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Thu, 7 May 2015 18:00:53 +0200 Subject: [PATCH 4/7] Adding a new User Guide section explaining the `backend` pseudo-module. Updated the readme accordingly. --- .coveragerc | 2 -- README.rst | 4 ++-- build | 5 ----- docs/guide.rst | 17 +++++++++++++++++ sknn/__init__.py | 2 +- 5 files changed, 20 insertions(+), 10 deletions(-) delete mode 100644 .coveragerc delete mode 100755 build diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 0dd597b..0000000 --- a/.coveragerc +++ /dev/null @@ -1,2 +0,0 @@ -[run] -omit=*pywrap2* diff --git a/README.rst b/README.rst index 3c6f113..dee5b94 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ scikit-neuralnetwork ==================== -Deep neural network implementation without the learning cliff! This library implements multi-layer perceptrons as a wrapper for the powerful ``pylearn2`` library that's compatible with ``scikit-learn`` for a more user-friendly and Pythonic interface. Oh, and it runs on your GPU by default. +Deep neural network implementation without the learning cliff! This library implements multi-layer perceptrons as a wrapper for the powerful ``pylearn2`` library that's compatible with ``scikit-learn`` for a more user-friendly and Pythonic interface. **NOTE**: This project is possible thanks to the `nucl.ai Conference `_ on **July 20-22**. Join us in **Vienna**! @@ -19,7 +19,7 @@ Thanks to the underlying ``pylearn2`` implementation, this library supports the * Linear: ``Linear``, ``Gaussian``, ``Softmax``. * **Layer Types —** ``Convolution`` (greyscale and color, 2D), ``Dense`` (standard, 1D). * **Learning Rules —** ``sgd``, ``momentum``, ``nesterov``, ``adadelta``, ``rmsprop``. -* **Dataset Types —** ``numpy.ndarray``, coming soon ``scipy.sparse``. +* **Dataset Types —** ``numpy.ndarray``, ``scipy.sparse``, coming soon: iterators. If a feature you need is missing, consider opening a `GitHub Issue `_ with a detailed explanation about the use case and we'll see what we can do. diff --git a/build b/build deleted file mode 100755 index fc573b8..0000000 --- a/build +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/sh -/usr/local/bin/virtualenv -p /usr/local/bin/python2.7 venv --clear --no-site-packages -venv/bin/python setup.py sdist -/usr/local/bin/twine upload -r test dist/*.tar.gz -venv/bin/pip install -i https://testpypi.python.org/pypi scikit-neuralnetwork diff --git a/docs/guide.rst b/docs/guide.rst index 4cb31ea..c930915 100644 --- a/docs/guide.rst +++ b/docs/guide.rst @@ -126,3 +126,20 @@ Here's how to setup such a pipeline with a multi-layer perceptron as a classifie pipeline.fit(X_train, y_train) You can thes use the pipeline as you would the neural network, or any other standard API from scikit-learn. + + +GPU Backend +----------- + +To setup the library to use your GPU or CPU explicitly in 32-bit or 64-bit mode, you can use the ``backend`` pseudo-module. It's a syntactic helper to setup ``THEANO_FLAGS`` in a Pythonic way, for example: + +.. code:: python + + # Use the GPU in 32-bit mode, falling back otherwise. + from sknn.backend import gpu32 + + # Use the CPU in 64-bit mode. + from sknn.backend import cpu64 + + +WARNING: This will only work if your program has not yet imported the ``theano`` module, due to the way the library is designed. If ``THEANO_FLAGS`` are set on the command-line, they are not overwridden. diff --git a/sknn/__init__.py b/sknn/__init__.py index fb1e380..754e498 100644 --- a/sknn/__init__.py +++ b/sknn/__init__.py @@ -28,11 +28,11 @@ def configure(self, flags): cuda = logging.getLogger('theano.sandbox.cuda') cuda.setLevel(logging.CRITICAL) import theano - import theano.sandbox.cuda as cd cuda.setLevel(logging.WARNING) self.configured = True try: + import theano.sandbox.cuda as cd self.log.info('Using device gpu%i: %s', cd.active_device_number(), cd.active_device_name()) except AttributeError: self.log.info('Using device cpu0, with %r.', theano.config.floatX) From e78b46b515c205a0ff5cff2546929e8dd9042d0c Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Thu, 7 May 2015 19:57:57 +0200 Subject: [PATCH 5/7] Added some unit tests for the backend configurator. --- sknn/__init__.py | 2 +- sknn/tests/test_backend.py | 43 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 sknn/tests/test_backend.py diff --git a/sknn/__init__.py b/sknn/__init__.py index 754e498..2b3ae56 100644 --- a/sknn/__init__.py +++ b/sknn/__init__.py @@ -42,7 +42,7 @@ def __getattr__(self, name): if name.endswith('32'): flags = ',floatX=float32' if name.endswith('64'): - flags = ',floatX=float32' + flags = ',floatX=float64' if name.startswith('cpu'): return self.configure('device=cpu'+flags) diff --git a/sknn/tests/test_backend.py b/sknn/tests/test_backend.py new file mode 100644 index 0000000..1f16304 --- /dev/null +++ b/sknn/tests/test_backend.py @@ -0,0 +1,43 @@ +import unittest +from nose.tools import (assert_in, assert_equal) + +import os +import sys + +import sknn + + +class TestBackendPseudoModule(unittest.TestCase): + + def setUp(self): + if 'THEANO_FLAGS' in os.environ: + del os.environ['THEANO_FLAGS'] + for name in sys.modules.keys(): + if name.startswith('theano'): + del sys.modules[name] + sys.modules['sknn.backend'].configured = False + + def test_TheanoWarning(self): + pass + + def _check(self, flags): + assert_in('THEANO_FLAGS', os.environ) + variable = os.environ['THEANO_FLAGS'] + for f in flags: + assert_in(f, variable) + + def test_FlagsGPU32(self): + from sknn.backend import gpu32 + self._check(['floatX=float32','device=gpu']) + + def test_FlagsCPU32(self): + from sknn.backend import cpu32 + self._check(['floatX=float32','device=cpu']) + + def test_FlagsGPU64(self): + from sknn.backend import gpu64 + self._check(['floatX=float64','device=gpu']) + + def test_FlagsCPU64(self): + from sknn.backend import cpu64 + self._check(['floatX=float64','device=cpu']) From 7085c254c4ca0c1996bd3011ba1266558b4156e0 Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Thu, 7 May 2015 20:10:49 +0200 Subject: [PATCH 6/7] Another check to test for Theano import warning, fix for other broken tests by reimporting theano. --- .gitignore | 2 ++ sknn/__init__.py | 4 ++-- sknn/tests/test_backend.py | 20 +++++++++++++++++++- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 482942b..66cc806 100644 --- a/.gitignore +++ b/.gitignore @@ -43,8 +43,10 @@ pip-delete-this-directory.txt htmlcov/ .tox/ .coverage +.coveragerc .coverage.* .cache +.settings nosetests.xml coverage.xml *,cover diff --git a/sknn/__init__.py b/sknn/__init__.py index 2b3ae56..a31ad2d 100644 --- a/sknn/__init__.py +++ b/sknn/__init__.py @@ -19,7 +19,8 @@ def __init__(self): def configure(self, flags): if self.configured is True: return - + self.configured = True + if 'theano' in sys.modules: self.log.warning('Theano was already imported and cannot be reconfigured.') return @@ -30,7 +31,6 @@ def configure(self, flags): import theano cuda.setLevel(logging.WARNING) - self.configured = True try: import theano.sandbox.cuda as cd self.log.info('Using device gpu%i: %s', cd.active_device_number(), cd.active_device_name()) diff --git a/sknn/tests/test_backend.py b/sknn/tests/test_backend.py index 1f16304..6bbe781 100644 --- a/sknn/tests/test_backend.py +++ b/sknn/tests/test_backend.py @@ -1,8 +1,10 @@ import unittest from nose.tools import (assert_in, assert_equal) +import io import os import sys +import logging import sknn @@ -12,13 +14,29 @@ class TestBackendPseudoModule(unittest.TestCase): def setUp(self): if 'THEANO_FLAGS' in os.environ: del os.environ['THEANO_FLAGS'] + + self.removed = {} for name in sys.modules.keys(): if name.startswith('theano'): + self.removed[name] = sys.modules[name] del sys.modules[name] sys.modules['sknn.backend'].configured = False + self.buf = io.StringIO() + self.hnd = logging.StreamHandler(self.buf) + logging.getLogger('sknn').addHandler(self.hnd) + logging.getLogger().setLevel(logging.WARNING) + + def tearDown(self): + for name, module in self.removed.items(): + sys.modules[name] = module + logging.getLogger('sknn').removeHandler(self.hnd) + def test_TheanoWarning(self): - pass + import theano + from sknn.backend import cpu + assert_equal('Theano was already imported and cannot be reconfigured.\n', + self.buf.getvalue()) def _check(self, flags): assert_in('THEANO_FLAGS', os.environ) From 97eb138394a10b7ef64c257adde6109c5c58d80b Mon Sep 17 00:00:00 2001 From: "Alex J. Champandard" Date: Thu, 7 May 2015 20:47:53 +0200 Subject: [PATCH 7/7] Fix for the Python3 tests, restoring modules as it expects them. --- .gitignore | 1 + sknn/tests/test_backend.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 66cc806..31ea801 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,7 @@ htmlcov/ .tox/ .coverage .coveragerc +build .coverage.* .cache .settings diff --git a/sknn/tests/test_backend.py b/sknn/tests/test_backend.py index 6bbe781..63cf846 100644 --- a/sknn/tests/test_backend.py +++ b/sknn/tests/test_backend.py @@ -14,9 +14,11 @@ class TestBackendPseudoModule(unittest.TestCase): def setUp(self): if 'THEANO_FLAGS' in os.environ: del os.environ['THEANO_FLAGS'] - + + import theano + self.removed = {} - for name in sys.modules.keys(): + for name in list(sys.modules.keys()): if name.startswith('theano'): self.removed[name] = sys.modules[name] del sys.modules[name]