From d7fc43c5657ff195b5c9ea6fa77670f89ba00099 Mon Sep 17 00:00:00 2001 From: Sharvil Nanavati Date: Thu, 26 Mar 2020 00:15:14 -0700 Subject: [PATCH] Expose IndRNN layer via the public API (TensorFlow). Issue: #7 --- frameworks/tf/__init__.py | 2 + frameworks/tf/indrnn.py | 189 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+) create mode 100644 frameworks/tf/indrnn.py diff --git a/frameworks/tf/__init__.py b/frameworks/tf/__init__.py index f79bbb3..8645ba8 100644 --- a/frameworks/tf/__init__.py +++ b/frameworks/tf/__init__.py @@ -20,6 +20,7 @@ from .gru import GRU from .gru_cell import GRUCell +from .indrnn import IndRNN from .layer_norm import LayerNorm from .layer_norm_lstm import LayerNormLSTM from .layer_norm_lstm_cell import LayerNormLSTMCell @@ -30,6 +31,7 @@ __all__ = [ 'GRU', 'GRUCell', + 'IndRNN', 'LayerNorm', 'LayerNormLSTM', 'LayerNormLSTMCell', diff --git a/frameworks/tf/indrnn.py b/frameworks/tf/indrnn.py new file mode 100644 index 0000000..d1ce9e8 --- /dev/null +++ b/frameworks/tf/indrnn.py @@ -0,0 +1,189 @@ +# Copyright 2020 LMNT, Inc. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""Independently Recurrent Neural Network""" + + +import pkg_resources +import tensorflow as tf + +from tensorflow.compat import v1 +from tensorflow.compat.v1.nn import rnn_cell + +from .base_rnn import BaseRNN + + +__all__ = [ + 'IndRNN' +] + + +LIB = tf.load_op_library(pkg_resources.resource_filename(__name__, 'libhaste_tf.so')) + + +@tf.RegisterGradient("HasteIndrnn") +def indrnn_gradient(op, *grads): + training = op.get_attr('training') + if not training: + raise ValueError(('IndRNN can only compute gradients if `training=True` was specified during ' + 'the forward pass.\nFailed op: {}').format(op.name)) + + # Extract inputs and outputs from the op. + x = op.inputs[0] + W = op.inputs[1] + u = op.inputs[2] + b = op.inputs[3] + zoneout_mask = op.inputs[4] + h = op.outputs[0] + + # Pre-transpose matrices for better performance. + x = tf.transpose(x, [2, 0, 1]) + W = tf.transpose(W, [1, 0]) + + dx, dW, du, db = LIB.haste_indrnn_grad(x, W, u, b, zoneout_mask, h, grads[0]) + return [dx, dW, du, db, None] + + +def _get_initializer(initializer, default): + if not isinstance(initializer, dict): + return initializer or default + if 'uniform' in initializer: + value = initializer['uniform'] + return v1.initializers.random_uniform(-value, value) + if 'normal' in initializer: + value = initializer['normal'] + return v1.initializers.truncated_normal(stddev=value) + raise ValueError(f'Unknown initializer {initializer}') + + +class IndRNNLayer(tf.Module): + def __init__(self, + num_units, + kernel_initializer=None, + recurrent_initializer=None, + bias_initializer=None, + zoneout=0.0, + dtype=None, + name=None): + super().__init__(name) + self.realname = name + self.num_units = num_units + + self.kernel_initializer = \ + _get_initializer(kernel_initializer, v1.initializers.glorot_uniform()) + self.recurrent_initializer = \ + _get_initializer(recurrent_initializer, v1.initializers.random_uniform(-0.5, 0.5)) + self.bias_initializer = \ + _get_initializer(bias_initializer, v1.initializers.zeros()) + + self.zoneout = zoneout + self.dtype = dtype or tf.float32 + self.kernel = None + self.recurrent_scale = None + self.bias = None + self.recurrent_bias = None + self.built = False + + def build(self, shape): + if self.built: + return + + num_units = self.num_units + input_size = int(shape[-1]) + + kernel_shape = tf.TensorShape([input_size, num_units]) + recurrent_shape = tf.TensorShape([num_units]) + bias_shape = tf.TensorShape([num_units]) + + kernel_weights = self.kernel_initializer(kernel_shape, dtype=self.dtype) + recurrent_weights = self.recurrent_initializer(recurrent_shape, dtype=self.dtype) + biases = self.bias_initializer(bias_shape) + + with self.name_scope, v1.variable_scope(self.realname, 'indrnn_cell'): + self.kernel = v1.get_variable('kernel', initializer=kernel_weights) + self.recurrent_scale = v1.get_variable('recurrent_scale', initializer=recurrent_weights) + self.bias = v1.get_variable('bias', initializer=biases) + + self.built = True + + def __call__(self, inputs, sequence_length, training): + self.build(inputs.shape) + + shape = tf.shape(inputs) + time_steps = shape[0] + batch_size = shape[1] + + # Use an empty zoneout mask if no zoneout is going to be applied. + # Sadly, we can't pass `None` to the op but at least we won't be wasting + # memory or bandwidth on this tensor. + zoneout_mask = tf.zeros([0, 0, 0], dtype=self.dtype) + if self.zoneout: + zoneout_mask = 1.0 - self.zoneout + zoneout_mask += tf.random_uniform([time_steps, batch_size, self.num_units], dtype=self.dtype) + zoneout_mask = tf.floor(zoneout_mask) + + result = LIB.haste_indrnn( + inputs, + self.kernel, + self.recurrent_scale, + self.bias, + zoneout_mask, + training=training, + zoneout_prob=self.zoneout) + + if sequence_length is not None: + # 0-indexed tensors, so length-1. + indices = sequence_length + indices = tf.stack([indices, tf.range(batch_size, dtype=sequence_length.dtype)], axis=-1) + state = tf.gather_nd(result, indices) + else: + state = result[-1] + + return result[1:], state + + +class IndRNN(BaseRNN): + """ + Independently Recurrent Neural Network layer. + + This layer offers a fused, GPU-accelerated TensorFlow op for inference + and training. It also supports Zoneout regularization. + """ + + def __init__(self, num_units, direction='unidirectional', **kwargs): + """ + Initialize the parameters of the IndRNN layer. + + Arguments: + num_units: int, the number of units in the IndRNN cell. + direction: string, 'unidirectional' or 'bidirectional'. + **kwargs: Dict, keyword arguments (see below). + + Keyword Arguments: + kernel_initializer: (optional) the initializer to use for the input + matrix weights. Defaults to `glorot_uniform`. + recurrent_initializer: (optional) the initializer to use for the + recurrent scale weights. Defaults to uniform random in [-0.5, 0.5]. + Note that this initialization scheme is different than in the original + authors' implementation. See https://github.com/lmnt-com/haste/issues/7 + for details. + bias_initializer: (optional) the initializer to use for the bias vector. + Defaults to `zeros`. + zoneout: (optional) float, sets the zoneout rate for Zoneout + regularization. Defaults to 0. + dtype: (optional) the data type for this layer. Defaults to `tf.float32`. + name: (optional) string, the name for this layer. + """ + super().__init__(IndRNNLayer, num_units, direction, 'indrnn_cell', **kwargs)