diff --git a/modules.py b/modules.py index 2ff2385..4222d0a 100644 --- a/modules.py +++ b/modules.py @@ -118,49 +118,49 @@ def embedding(inputs, def positional_encoding(inputs, - vocab_size, - num_units, - zero_pad = True, - scale = True, - scope = "positional_embedding", - reuse = None): - ''' - Positional_Encoding for a given tensor. + num_units, + zero_pad=True, + scale=True, + scope="positional_encoding", + reuse=None): + '''Sinusoidal Positional_Encoding. Args: - inputs: [Tensor], A tensor contains the ids to be search from the lookup table, shape = [batch_size, 1 + len(inpt)] - vocab_size: [Int], Vocabulary size - num_units: [Int], Hidden size of embedding - zero_pad: [Boolean], If True, all the values of the first row(id = 0) should be constant zero - scale: [Boolean], If True, the output will be multiplied by sqrt num_units(check details from paper) - scope: [String], Optional scope for 'variable_scope' - reuse: [Boolean], If to reuse the weights of a previous layer by the same name + inputs: A 2d Tensor with shape of (N, T). + num_units: Output dimensionality + zero_pad: Boolean. If True, all the values of the first row (id = 0) should be constant zero + scale: Boolean. If True, the output will be multiplied by sqrt num_units(check details from paper) + scope: Optional scope for `variable_scope`. + reuse: Boolean, whether to reuse the weights of a previous layer + by the same name. - Returns: + Returns: A 'Tensor' with one more rank than inputs's, with the dimensionality should be 'num_units' ''' - with tf.variable_scope(scope, reuse = reuse): + N, T = inputs.get_shape().as_list() + with tf.variable_scope(scope, reuse=reuse): + position_ind = tf.tile(tf.expand_dims(tf.range(T), 0), [N, 1]) - input_one = tf.tile(tf.expand_dims(tf.range(tf.shape(inputs)[1]), 0), [tf.shape(inputs)[0], 1]) # First part of the PE function: sin and cos argument position_enc = np.array([ - [pos / np.power(10000, 2*i/num_units) for i in range(num_units)] - for pos in range(max_len)]) + [pos / np.power(10000, 2.*i/num_units) for i in range(num_units)] + for pos in range(T)]) + # Second part, apply the cosine to even columns and sin to odds. - position_enc[:, 0::2] = np.sin(position_enc[1:, 0::2]) # dim 2i - position_enc[:, 1::2] = np.cos(position_enc[1:, 1::2]) # dim 2i+1 + position_enc[:, 0::2] = np.sin(position_enc[:, 0::2]) # dim 2i + position_enc[:, 1::2] = np.cos(position_enc[:, 1::2]) # dim 2i+1 + # Convert to a tensor lookup_table = tf.convert_to_tensor(position_enc) if zero_pad: + lookup_table = tf.concat((tf.zeros(shape=[1, num_units]), + lookup_table[1:, :]), 0) + outputs = tf.nn.embedding_lookup(lookup_table, position_ind) - lookup_table = tf.concat((tf.zeros(shape = [1, num_units]), - lookup_table[1:, :]), 0) - outputs = tf.nn.embedding_lookup(lookup_table, input_one) - if scale: - outputs = outputs * math.sqrt(num_units) + outputs = outputs * num_units**0.5 return outputs