From b305afa1ab8c123449361916f94a16040636c924 Mon Sep 17 00:00:00 2001 From: Adam Lesnikowski Date: Thu, 30 Sep 2021 19:13:51 +0000 Subject: [PATCH] Update tensorflow dataloader implementation Update tensorflow dataloader implementation for speed optimization. This implements a suggested revision by @jperez999 for issue #1077. --- nvtabular/loader/tensorflow.py | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/nvtabular/loader/tensorflow.py b/nvtabular/loader/tensorflow.py index b74629883a2..d7fd2f296e7 100644 --- a/nvtabular/loader/tensorflow.py +++ b/nvtabular/loader/tensorflow.py @@ -445,23 +445,10 @@ def _build_sparse_tensor(self, values, offsets, diff_offsets, num_rows, seq_limi """ ragged = tf.RaggedTensor.from_row_lengths(values=values, row_lengths=diff_offsets) - # Get vector of padding lengths using tf ops like reduce_sum. - non_zero_entries_by_row = tf.math.reduce_sum(ragged / ragged, axis=1) - paddings = seq_limit - non_zero_entries_by_row.numpy() - - # Make zeros ragged tensor to pad our data tensor with. - total_entries = ragged.shape[0] * seq_limit - non_zero_entries = tf.reduce_sum(ragged / ragged).numpy() - zeros_count = total_entries - non_zero_entries - zeros_values = tf.zeros(shape=(int(zeros_count)), dtype=tf.dtypes.int64) - zeros = tf.RaggedTensor.from_row_lengths(values=zeros_values, row_lengths=paddings) - - # Concatenate zeros ragged tensor with our data tensor on either the left or the right, - # depending on either left_pad or not. - if self.pad_left: - tensor = tf.concat([zeros, ragged], axis=1).to_tensor() - else: - tensor = tf.concat([ragged, zeros], axis=1).to_tensor() + reverse = tf.reverse(ragged, [-1]).to_tensor(0) + tensor = tf.reverse(reverse, [-1]) + paddings = tf.constant([[0, 0], [seq_limit - tensor.shape[1], 0]]) + tensor = tf.pad(tensor, paddings) tensor = tf.RaggedTensor.from_tensor(tensor).to_sparse() if self.sparse_as_dense: