Skip to content

Commit

Permalink
Update tensorflow dataloader implementation
Browse files Browse the repository at this point in the history
Update tensorflow dataloader implementation for speed optimization. This
implements a suggested revision by @jperez999 for issue #1077.
  • Loading branch information
Adam Lesnikowski committed Sep 30, 2021
1 parent a51aa44 commit b305afa
Showing 1 changed file with 4 additions and 17 deletions.
21 changes: 4 additions & 17 deletions nvtabular/loader/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,23 +445,10 @@ def _build_sparse_tensor(self, values, offsets, diff_offsets, num_rows, seq_limi
"""
ragged = tf.RaggedTensor.from_row_lengths(values=values, row_lengths=diff_offsets)

# Get vector of padding lengths using tf ops like reduce_sum.
non_zero_entries_by_row = tf.math.reduce_sum(ragged / ragged, axis=1)
paddings = seq_limit - non_zero_entries_by_row.numpy()

# Make zeros ragged tensor to pad our data tensor with.
total_entries = ragged.shape[0] * seq_limit
non_zero_entries = tf.reduce_sum(ragged / ragged).numpy()
zeros_count = total_entries - non_zero_entries
zeros_values = tf.zeros(shape=(int(zeros_count)), dtype=tf.dtypes.int64)
zeros = tf.RaggedTensor.from_row_lengths(values=zeros_values, row_lengths=paddings)

# Concatenate zeros ragged tensor with our data tensor on either the left or the right,
# depending on either left_pad or not.
if self.pad_left:
tensor = tf.concat([zeros, ragged], axis=1).to_tensor()
else:
tensor = tf.concat([ragged, zeros], axis=1).to_tensor()
reverse = tf.reverse(ragged, [-1]).to_tensor(0)
tensor = tf.reverse(reverse, [-1])
paddings = tf.constant([[0, 0], [seq_limit - tensor.shape[1], 0]])
tensor = tf.pad(tensor, paddings)

tensor = tf.RaggedTensor.from_tensor(tensor).to_sparse()
if self.sparse_as_dense:
Expand Down

0 comments on commit b305afa

Please sign in to comment.