Skip to content

Commit

Permalink
Merge pull request mila-iqia#1035 from dwf/saner_use_bias_push
Browse files Browse the repository at this point in the history
Saner use_bias push behaviour (and tied_biases for ConvolutionalSequence)
  • Loading branch information
dwf committed Mar 29, 2016
2 parents 83079b7 + ccdbc1b commit c39b3ad
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 13 deletions.
19 changes: 14 additions & 5 deletions blocks/bricks/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def _allocate(self):
add_role(W, FILTER)
self.parameters.append(W)
self.add_auxiliary_variable(W.norm(2), name='W_norm')
if self.use_bias:
if getattr(self, 'use_bias', True):
if self.tied_biases:
b = shared_floatx_nans((self.num_filters,), name='b')
else:
Expand Down Expand Up @@ -142,7 +142,7 @@ def apply(self, input_):
border_mode=self.border_mode,
filter_shape=((self.num_filters, self.num_channels) +
self.filter_size))
if self.use_bias:
if getattr(self, 'use_bias', True):
if self.tied_biases:
output += self.b.dimshuffle('x', 0, 'x', 'x')
else:
Expand Down Expand Up @@ -414,6 +414,10 @@ class ConvolutionalSequence(Sequence, Initializable, Feedforward):
need to rely on either a default border mode (usually valid)
or one provided at construction and/or after construction
(but before allocation).
tied_biases : bool, optional
Same meaning as in :class:`Convolutional`. Defaults to ``None``,
in which case no value is pushed to child :class:`Convolutional`
bricks.
Notes
-----
Expand All @@ -423,6 +427,9 @@ class ConvolutionalSequence(Sequence, Initializable, Feedforward):
input dimensions of a layer to the output dimensions of the previous
layer by the :meth:`~.Brick.push_allocation_config` method.
The push behaviour of `tied_biases` mirrors that of `use_bias` or any
initialization configuration: only an explicitly specified value is
pushed down the hierarchy. `border_mode` also has this behaviour.
The reason the `border_mode` parameter behaves the way it does is that
pushing a single default `border_mode` makes it very difficult to
have child bricks with different border modes. Normally, such things
Expand All @@ -434,7 +441,7 @@ class ConvolutionalSequence(Sequence, Initializable, Feedforward):
"""
@lazy(allocation=['num_channels'])
def __init__(self, layers, num_channels, batch_size=None, image_size=None,
border_mode=None, tied_biases=False, **kwargs):
border_mode=None, tied_biases=None, **kwargs):
self.layers = layers
self.image_size = image_size
self.num_channels = num_channels
Expand Down Expand Up @@ -471,11 +478,13 @@ def _push_allocation_config(self):
continue
if self.border_mode is not None:
layer.border_mode = self.border_mode
layer.tied_biases = self.tied_biases
if self.tied_biases is not None:
layer.tied_biases = self.tied_biases
layer.image_size = image_size
layer.num_channels = num_channels
layer.batch_size = self.batch_size
layer.use_bias = self.use_bias
if getattr(self, 'use_bias', None) is not None:
layer.use_bias = self.use_bias

# Push input dimensions to children
layer.push_allocation_config()
Expand Down
9 changes: 5 additions & 4 deletions blocks/bricks/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,15 +143,16 @@ class Initializable(RNGMixin, Brick):
has_biases = True

@lazy()
def __init__(self, weights_init=None, biases_init=None, use_bias=True,
def __init__(self, weights_init=None, biases_init=None, use_bias=None,
seed=None, **kwargs):
super(Initializable, self).__init__(**kwargs)
self.weights_init = weights_init
if self.has_biases:
self.biases_init = biases_init
elif biases_init is not None or not use_bias:
raise ValueError("This brick does not support biases config")
self.use_bias = use_bias
if use_bias is not None:
self.use_bias = use_bias
self.seed = seed

def _push_initialization_config(self):
Expand Down Expand Up @@ -187,15 +188,15 @@ def W(self):

@property
def b(self):
if self.use_bias:
if getattr(self, 'use_bias', True):
return self.parameters[1]
else:
raise AttributeError('use_bias is False')

def _initialize(self):
# Use self.parameters[] references in case W and b are overridden
# to return non-shared-variables.
if self.use_bias:
if getattr(self, 'use_bias', True):
self.biases_init.initialize(self.parameters[1], self.rng)
self.weights_init.initialize(self.parameters[0], self.rng)

Expand Down
6 changes: 4 additions & 2 deletions blocks/bricks/sequences.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ class MLP(FeedforwardSequence, Initializable):
-----
See :class:`Initializable` for initialization parameters.
Note that the ``weights_init``, ``biases_init`` and ``use_bias``
Note that the ``weights_init``, ``biases_init`` (as well as
``use_bias`` if set to a value other than the default of ``None``)
configurations will overwrite those of the layers each time the
:class:`MLP` is re-initialized. For more fine-grained control, push the
configuration to the child layers manually before initialization.
Expand Down Expand Up @@ -160,4 +161,5 @@ def _push_allocation_config(self):
self.linear_transformations):
layer.input_dim = input_dim
layer.output_dim = output_dim
layer.use_bias = self.use_bias
if getattr(self, 'use_bias', None) is not None:
layer.use_bias = self.use_bias
4 changes: 2 additions & 2 deletions blocks/bricks/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def _allocate(self):
add_role(W, WEIGHT)
self.parameters.append(W)
self.add_auxiliary_variable(W.norm(2), name='W_norm')
if self.use_bias:
if getattr(self, 'use_bias', True):
b = shared_floatx_nans((self.output_dim,), name='b')
add_role(b, BIAS)
self.parameters.append(b)
Expand All @@ -71,7 +71,7 @@ def apply(self, input_):
"""
output = tensor.dot(input_, self.W)
if self.use_bias:
if getattr(self, 'use_bias', True):
output += self.b
return output

Expand Down
15 changes: 15 additions & 0 deletions tests/bricks/test_bricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,21 @@ class MyLinear(Linear):
for i, lt in enumerate(mlp.linear_transformations))


def test_mlp_use_bias_pushed_when_not_explicitly_specified():
mlp = MLP(activations=[Tanh(), Tanh(), None],
dims=[4, 5, 6, 7], prototype=Linear(use_bias=False),
use_bias=True)
mlp.push_allocation_config()
assert [lin.use_bias for lin in mlp.linear_transformations]


def test_mlp_use_bias_not_pushed_when_not_explicitly_specified():
mlp = MLP(activations=[Tanh(), Tanh(), None],
dims=[4, 5, 6, 7], prototype=Linear(use_bias=False))
mlp.push_allocation_config()
assert [not lin.use_bias for lin in mlp.linear_transformations]


def test_mlp_apply():
x = tensor.matrix()
x_val = numpy.random.rand(2, 16).astype(theano.config.floatX)
Expand Down
41 changes: 41 additions & 0 deletions tests/bricks/test_conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,3 +331,44 @@ def test_convolutional_sequence_use_bias():
y = cnn.apply(x)
params = ComputationGraph(y).parameters
assert len(params) == 3 and all(param.name == 'W' for param in params)


def test_convolutional_sequence_use_bias_not_pushed_if_not_explicitly_set():
cnn = ConvolutionalSequence(
sum([[Convolutional(filter_size=(1, 1), num_filters=1,
use_bias=False), Rectifier()]
for _ in range(3)], []),
num_channels=1, image_size=(1, 1))
cnn.allocate()
assert [not child.use_bias for child in cnn.children
if isinstance(child, Convolutional)]


def test_convolutional_sequence_tied_biases_not_pushed_if_not_explicitly_set():
cnn = ConvolutionalSequence(
sum([[Convolutional(filter_size=(1, 1), num_filters=1,
tied_biases=True), Rectifier()]
for _ in range(3)], []),
num_channels=1, image_size=(1, 1))
cnn.allocate()
assert [child.tied_biases for child in cnn.children
if isinstance(child, Convolutional)]


def test_convolutional_sequence_tied_biases_pushed_if_explicitly_set():
cnn = ConvolutionalSequence(
sum([[Convolutional(filter_size=(1, 1), num_filters=1,
tied_biases=True), Rectifier()]
for _ in range(3)], []),
num_channels=1, image_size=(1, 1), tied_biases=False)
cnn.allocate()
assert [not child.tied_biases for child in cnn.children
if isinstance(child, Convolutional)]

cnn = ConvolutionalSequence(
sum([[Convolutional(filter_size=(1, 1), num_filters=1), Rectifier()]
for _ in range(3)], []),
num_channels=1, image_size=(1, 1), tied_biases=True)
cnn.allocate()
assert [child.tied_biases for child in cnn.children
if isinstance(child, Convolutional)]

0 comments on commit c39b3ad

Please sign in to comment.