Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
address comments
Browse files Browse the repository at this point in the history
zhli1142015 committed Jan 28, 2025
1 parent 607eb82 commit dc0d2a2
Showing 5 changed files with 46 additions and 22 deletions.
15 changes: 14 additions & 1 deletion velox/exec/HashProbe.cpp
Original file line number Diff line number Diff line change
@@ -1373,11 +1373,24 @@ SelectivityVector HashProbe::evalFilterForNullAwareJoin(
}

if (buildSideHasNullKeys_) {
if (nullKeyProbeHashers_.empty()) {
nullKeyProbeHashers_ =
createVectorHashers(probeType_, joinNode_->leftKeys());
VELOX_CHECK_EQ(nullKeyProbeHashers_.size(), 1);
if (table_->hashMode() == BaseHashTable::HashMode::kHash) {
nullKeyProbeInput_ =
BaseVector::create(nullKeyProbeHashers_[0]->type(), 1, pool());
nullKeyProbeInput_->setNull(0, true);
SelectivityVector selectivity(1);
nullKeyProbeHashers_[0]->decode(*nullKeyProbeInput_, selectivity);
}
}
BaseHashTable::NullKeyRowsIterator iter;
nullKeyProbeRows.deselect(filterPassedRows);
applyFilterOnTableRowsForNullAwareJoin(
nullKeyProbeRows, filterPassedRows, [&](char** data, int32_t maxRows) {
return table_->listNullKeyRows(&iter, maxRows, data);
return table_->listNullKeyRows(
&iter, maxRows, data, nullKeyProbeHashers_);
});
}
BaseHashTable::RowsIterator iter;
6 changes: 6 additions & 0 deletions velox/exec/HashProbe.h
Original file line number Diff line number Diff line change
@@ -680,6 +680,12 @@ class HashProbe : public Operator {

// The spilled probe partitions remaining to restore.
SpillPartitionSet inputSpillPartitionSet_;

// VectorHashers used for listing rows with null keys.
std::vector<std::unique_ptr<VectorHasher>> nullKeyProbeHashers_;

// Input vector used for listing rows with null keys.
VectorPtr nullKeyProbeInput_;
};

inline std::ostream& operator<<(std::ostream& os, ProbeOperatorState state) {
19 changes: 8 additions & 11 deletions velox/exec/HashTable.cpp
Original file line number Diff line number Diff line change
@@ -350,11 +350,6 @@ bool HashTable<ignoreNullKeys>::compareKeys(
const char* group,
HashLookup& lookup,
vector_size_t row) {
// If `probeForNullKeyOnly` is set in the lookup object, the function will
// skip the key comparison and return true immediately.
if (lookup.probeForNullKeyOnly) {
return true;
}
int32_t numKeys = lookup.hashers.size();
// The loop runs at least once. Allow for first comparison to fail
// before loop end check.
@@ -2025,12 +2020,11 @@ template <>
int32_t HashTable<false>::listNullKeyRows(
NullKeyRowsIterator* iter,
int32_t maxRows,
char** rows) {
char** rows,
const std::vector<std::unique_ptr<VectorHasher>>& hashers) {
if (!iter->initialized) {
VELOX_CHECK_GT(nextOffset_, 0);
VELOX_CHECK_EQ(hashers_.size(), 1);
HashLookup lookup(hashers_);
lookup.probeForNullKeyOnly = true;
HashLookup lookup(hashers);
if (hashMode_ == HashMode::kHash) {
lookup.hashes.push_back(VectorHasher::kNullHash);
} else {
@@ -2068,8 +2062,11 @@ int32_t HashTable<false>::listNullKeyRows(
}

template <>
int32_t
HashTable<true>::listNullKeyRows(NullKeyRowsIterator*, int32_t, char**) {
int32_t HashTable<true>::listNullKeyRows(
NullKeyRowsIterator*,
int32_t,
char**,
const std::vector<std::unique_ptr<VectorHasher>>&) {
VELOX_UNREACHABLE();
}

15 changes: 7 additions & 8 deletions velox/exec/HashTable.h
Original file line number Diff line number Diff line change
@@ -95,11 +95,6 @@ struct HashLookup {
/// If using valueIds, list of concatenated valueIds. 1:1 with 'hashes'.
/// Populated by groupProbe and joinProbe.
raw_vector<uint64_t> normalizedKeys;

/// If true, the probe will only consider rows with null keys.
/// During probing, only the hash value is compared, and key comparison is
/// skipped to avoid potential issues. This is used by listNullKeyRows.
bool probeForNullKeyOnly{false};
};

struct HashTableStats {
@@ -284,8 +279,11 @@ class BaseHashTable {

/// Returns all rows with null keys. Used by null-aware joins (e.g. anti or
/// left semi project).
virtual int32_t
listNullKeyRows(NullKeyRowsIterator* iter, int32_t maxRows, char** rows) = 0;
virtual int32_t listNullKeyRows(
NullKeyRowsIterator* iter,
int32_t maxRows,
char** rows,
const std::vector<std::unique_ptr<VectorHasher>>& hashers) = 0;

virtual void prepareJoinTable(
std::vector<std::unique_ptr<BaseHashTable>> tables,
@@ -536,7 +534,8 @@ class HashTable : public BaseHashTable {
int32_t listNullKeyRows(
NullKeyRowsIterator* iter,
int32_t maxRows,
char** rows) override;
char** rows,
const std::vector<std::unique_ptr<VectorHasher>>& hashers) override;

void clear(bool freeTable) override;

13 changes: 11 additions & 2 deletions velox/exec/tests/HashTableTest.cpp
Original file line number Diff line number Diff line change
@@ -547,7 +547,14 @@ class HashTableTest : public testing::TestWithParam<bool>,
ASSERT_EQ(table->hashMode(), mode);
std::vector<char*> rows(nullValues.size());
BaseHashTable::NullKeyRowsIterator iter;
auto numRows = table->listNullKeyRows(&iter, rows.size(), rows.data());
std::vector<std::unique_ptr<VectorHasher>> probeHashers;
probeHashers.push_back(std::make_unique<VectorHasher>(keys->type(), 0));
auto nullKeyProbeInput = BaseVector::create(keys->type(), 1, pool());
nullKeyProbeInput->setNull(0, true);
SelectivityVector selectivity(1);
probeHashers[0]->decode(*nullKeyProbeInput, selectivity);
auto numRows =
table->listNullKeyRows(&iter, rows.size(), rows.data(), probeHashers);
ASSERT_EQ(numRows, nullValues.size());
auto actual =
BaseVector::create<FlatVector<int64_t>>(BIGINT(), numRows, pool());
@@ -558,7 +565,9 @@ class HashTableTest : public testing::TestWithParam<bool>,
nullValues.erase(it);
}
ASSERT_TRUE(nullValues.empty());
ASSERT_EQ(0, table->listNullKeyRows(&iter, rows.size(), rows.data()));
ASSERT_EQ(
0,
table->listNullKeyRows(&iter, rows.size(), rows.data(), probeHashers));
}

// Bitmap of positions in batches_ that end up in the table.

0 comments on commit dc0d2a2

Please sign in to comment.