Merge pull request #61 from aigamedev/logging

Automatically configuring logger if it has not been setup
aigamedev · May 23, 2015 · 8341b72 · 8341b72
2 parents 84fbaea + 65aaf82
commit 8341b72
Show file tree

Hide file tree

Showing 5 changed files with 96 additions and 60 deletions.
diff --git a/docs/guide_intermediate.rst b/docs/guide_intermediate.rst
@@ -4,9 +4,7 @@ Misc. Additions
 Verbose Mode
 ------------
 
-To see the output of the neural network's training, you need to configure two things: first setting up the Python logger (mandatory), and secondly to specify a verbose mode if you want more information during training (optional).
-
-The first step is to configure either the ``sknn`` logger specifically, or do so globally (easier) as follows:
+To see the output of the neural network's training, configure the Python logger called ``sknn`` or the default root logger.  This is possible using the standard ``logging`` module which you can setup as follows:
 
 .. code:: python
 
@@ -18,20 +16,9 @@ The first step is to configure either the ``sknn`` logger specifically, or do so
                 level=logging.DEBUG,
                 stream=sys.stdout)
 
-Then you can optionally create your neural networks using an additional ``verbose`` parameter to show the output during training:
-
-.. code:: python
-    
-    from sknn.mlp import Regressor, Layer
-
-    nn = Regressor(
-        layers=[Layer("Linear")],
-        n_iter=20,
-        verbose=True,
-        valid_size=0.25)
-    nn.fit(X, y)
+Change the log level to ``logging.INFO`` for less information about each epoch, or ``logging.WARNING`` only to receive messages about problems or failures.
 
-This code will output a table containing validation scores at each of the twenty epochs.  The ``valid_size`` parameter is a ratio of the data to be used internally for validation; in short, the ``fit()`` function is automatically splitting the data into ``X_train`` and ``y_train`` as well as ``X_valid`` and ``y_valid``.
+Using the flag ``verbose=True`` on either :class:`sknn.mlp.Classifier` and :class:`sknn.mlp.Regressor` will setup a default logger at ``DEBUG`` level if it does not exist, and ``verbose=False`` will setup a default logger at level ``WARNING`` if no logging has been configured.
 
 
 Saving & Loading

diff --git a/examples/bench_cifar10.py b/examples/bench_cifar10.py
@@ -3,23 +3,28 @@
 
 import sys
 import pickle
-import logging
-import numpy as np
 
-logging.basicConfig(format="%(message)s", level=logging.DEBUG, stream=sys.stdout)
+import numpy as np
 
 PRETRAIN = False
 
 
 def load(name):
+    # Pickle module isn't backwards compatible. Hack so it works:
+    compat = {'encoding': 'latin1'} if sys.version_info[0] == 3 else {}
+
     print("\t"+name)
     try:
         with open(name, 'rb') as f:
-            return pickle.load(f) # , encoding='latin1')
+            return pickle.load(f, **compat)
     except IOError:
         import gzip
         with gzip.open(name+'.gz', 'rb') as f:
-            return pickle.load(f) # , encoding='latin1')
+            return pickle.load(f, **compat)
+
+
+# Download and extract Python data for CIFAR10 manually from here:
+#     http://www.cs.toronto.edu/~kriz/cifar.html
 
 print("Loading...")
 dataset1 = load('data_batch_1')
@@ -38,18 +43,16 @@ def load(name):
 n_feat = data_train.shape[1]
 n_targets = labels_train.max() + 1
 
-import sys
-import logging
-logging.basicConfig(format="%(message)s", level=logging.DEBUG, stream=sys.stdout)
 
 from sknn import mlp
+
 nn = mlp.Classifier(
         layers=[
-            mlp.Layer("Sigmoid", units=128),
-            mlp.Layer("Sigmoid", units=128),
+            mlp.Layer("Tanh", units=n_feat*2/3),
+            mlp.Layer("Sigmoid", units=n_feat*1/3),
             mlp.Layer("Softmax", units=n_targets)],
-        n_iter=4,
-        n_stable=4,
+        n_iter=50,
+        n_stable=10,
         learning_rate=0.001,
         valid_size=0.5,
         verbose=1)
@@ -58,8 +61,8 @@ def load(name):
     from sknn import ae
     ae = ae.AutoEncoder(
             layers=[
-                ae.Layer("Sigmoid", units=128),
-                ae.Layer("Sigmoid", units=128)],
+                ae.Layer("Tanh", units=n_feat*2/3),
+                ae.Layer("Sigmoid", units=n_feat*2/3)],
             learning_rate=0.002,
             n_iter=10,
             verbose=1)
@@ -68,6 +71,7 @@ def load(name):
 
 nn.fit(data_train, labels_train)
 
+
 from sklearn.metrics import classification_report
 from sklearn.metrics import confusion_matrix
 

diff --git a/examples/bench_mnist.py b/examples/bench_mnist.py
@@ -3,7 +3,6 @@
 
 import sys
 import time
-import logging
 import numpy as np
 
 if len(sys.argv) == 1:
@@ -12,8 +11,6 @@
 
 np.set_printoptions(precision=4)
 np.set_printoptions(suppress=True)
-logging.basicConfig(format="%(message)s", level=logging.DEBUG, stream=sys.stdout)
-
 
 from sklearn.base import clone
 from sklearn.cross_validation import train_test_split
@@ -52,8 +49,7 @@
         valid_size=0.0,
         n_stable=10,
         n_iter=10,
-        verbose=1,
-    )
+        verbose=True)
     classifiers.append(('sknn.mlp', clf))
 
 if 'lasagne' in sys.argv:
@@ -83,8 +79,7 @@
         batch_iterator_train=BatchIterator(batch_size=25),
 
         max_epochs=10,
-        verbose=1
-        )
+        verbose=1)
     classifiers.append(('nolearn.lasagne', clf))
 
 

diff --git a/sknn/nn.py b/sknn/nn.py
@@ -4,6 +4,7 @@
 __all__ = ['Regressor', 'Classifier', 'Layer', 'Convolution']
 
 import os
+import sys
 import time
 import logging
 import itertools
@@ -316,8 +317,10 @@ class NeuralNetwork(object):
 
     loss_type: string, optional
         The cost function to use when training the network.  There are two valid options:
+
             * ``mse`` — Use mean squared error, for learning to predict the mean of the data.
             * ``mae`` — Use mean average error, for learning to predict the median of the data.
+
         The default option is ``mse``, and ``mae`` can only be applied to layers of type
         ``Linear`` or ``Gaussian`` and they must be used as the output layer.
 
@@ -327,9 +330,16 @@ class NeuralNetwork(object):
         be caught more effectively.  Default is off.
 
     verbose: bool, optional
-        If True, print the score at each epoch via the logger called 'sknn'.  You can
-        control the detail of the output by customising the logger level and formatter.
-        The default is off.
+        How to initialize the logging to display the results during training. If there is
+        already a logger initialized, either ``sknn`` or the root logger, then this function
+        does nothing.  Otherwise:
+
+            * ``False`` — Setup new logger that shows only warnings and errors.
+            * ``True`` — Setup a new logger that displays all debug messages.
+            * ``None`` — Don't setup a new logger under any condition (default). 
+
+        Using the built-in python ``logging`` module, you can control the detail and style of
+        output by customising the verbosity level and formatter for ``sknn`` logger.
     """
 
     def __init__(
@@ -350,7 +360,7 @@ def __init__(
             valid_size=0.0,
             loss_type='mse',
             debug=False,
-            verbose=False,
+            verbose=None,
             **params):
 
         self.layers = []
@@ -391,7 +401,9 @@ def __init__(
         self.loss_type = loss_type
         self.debug = debug
         self.verbose = verbose
-
+
+        self._create_logger()
+
         assert self.regularize in (None, 'L1', 'L2', 'dropout'),\
             "Unknown type of regularization specified: %s." % self.regularize
         assert self.loss_type in ('mse', 'mae'),\
@@ -432,6 +444,20 @@ def is_convolution(self):
         """
         return isinstance(self.layers[0], Convolution)
 
+    def _create_logger(self):
+        # If users have configured logging already, assume they know best.
+        if len(log.handlers) > 0 or len(log.parent.handlers) > 0 or self.verbose is None:
+            return
+
+        # Otherwise setup a default handler and formatter based on verbosity.
+        lvl = logging.DEBUG if self.verbose else logging.WARNING
+        fmt = logging.Formatter("%(message)s")
+        hnd = logging.StreamHandler(stream=sys.stdout)
+
+        hnd.setFormatter(fmt)
+        hnd.setLevel(lvl)
+        log.addHandler(hnd)
+
     def _create_matrix_input(self, X, y=None):
         if self.is_convolution:
             # Using `b01c` arrangement of data, see this for details:
@@ -478,23 +504,22 @@ def _train_layer(self, trainer, layer, dataset):
             layer.monitor.report_epoch()
             layer.monitor()
 
-            if self.verbose:
-                objective = layer.monitor.channels.get('objective', None)
-                if objective:
-                    avg_valid_error = objective.val_shared.get_value()
-                    best_valid_error = min(best_valid_error, avg_valid_error)
-                else:
-                    # 'objective' channel is only defined with validation set.
-                    avg_valid_error = None
-
-                best_valid = bool(best_valid_error == avg_valid_error)
-                log.debug("{:>5}      {}{}{}        {:>3.1f}s".format(
-                          i,
-                          ansi.GREEN if best_valid else "",
-                          "{:>10.6f}".format(float(avg_valid_error)) if avg_valid_error else "     N/A  ",
-                          ansi.ENDC if best_valid else "",
-                          time.time() - start
-                          ))
+            objective = layer.monitor.channels.get('objective', None)
+            if objective:
+                avg_valid_error = objective.val_shared.get_value()
+                best_valid_error = min(best_valid_error, avg_valid_error)
+            else:
+                # 'objective' channel is only defined with validation set.
+                avg_valid_error = None
+
+            best_valid = bool(best_valid_error == avg_valid_error)
+            log.debug("{:>5}      {}{}{}        {:>3.1f}s".format(
+                      i,
+                      ansi.GREEN if best_valid else "",
+                      "{:>10.6f}".format(float(avg_valid_error)) if avg_valid_error else "     N/A  ",
+                      ansi.ENDC if best_valid else "",
+                      time.time() - start
+                      ))
 
             if not trainer.continue_learning(layer):
                 log.debug("")

diff --git a/sknn/tests/test_training.py b/sknn/tests/test_training.py
@@ -1,5 +1,5 @@
 import unittest
-from nose.tools import (assert_in, assert_raises)
+from nose.tools import (assert_in, assert_raises, assert_equals)
 
 import io
 import logging
@@ -34,6 +34,31 @@ def test_FitAutomaticValidation(self):
         self.nn._fit(a_in, a_out)
 
 
+class TestCustomLogging(unittest.TestCase):
+
+    def setUp(self):
+        self.log = logging.getLogger('sknn')
+        self.log.handlers = []
+        self.backup, self.log.parent.handlers = self.log.parent.handlers, []
+
+    def tearDown(self):
+        self.log.parent.handlers = self.backup
+
+    def test_DefaultLogVerbose(self):
+        nn = MLPR(layers=[L("Linear")], verbose=True)
+        assert_equals(1, len(self.log.handlers))
+        assert_equals(logging.DEBUG, self.log.handlers[0].level)
+
+    def test_DefaultLogQuiet(self):
+        nn = MLPR(layers=[L("Linear")], verbose=False)
+        assert_equals(1, len(self.log.handlers))
+        assert_equals(logging.WARNING, self.log.handlers[0].level)
+
+    def test_VerboseNoneNoLog(self):
+        nn = MLPR(layers=[L("Linear")], verbose=None)
+        assert_equals(0, len(self.log.handlers))
+
+
 class TestTrainingOutput(unittest.TestCase):
 
     def setUp(self):