diff --git a/velox/exec/fuzzer/JoinFuzzer.cpp b/velox/exec/fuzzer/JoinFuzzer.cpp index 5218a9ff4db1e..3b24f6580cbc2 100644 --- a/velox/exec/fuzzer/JoinFuzzer.cpp +++ b/velox/exec/fuzzer/JoinFuzzer.cpp @@ -130,6 +130,17 @@ class JoinFuzzer { // Randomly pick a join type to test. core::JoinType pickJoinType(); + // Returns an equality join filter between probeKeys and buildKeys and adds a + // semi filter 10% of the time. When there is a semi filter, 50% of the time + // it is based off a column from the probe side, and the rest of the time, the + // build side. A random column is chosen from the probe or build side to + // filter on. + std::string makeJoinFilter( + const std::vector& probeKeys, + const std::vector& buildKeys, + const std::vector& probeInput, + const std::vector& buildInput); + // Makes the query plan with default settings in JoinFuzzer and value inputs // for both probe and build sides. // @@ -380,6 +391,39 @@ core::JoinType JoinFuzzer::pickJoinType() { return kJoinTypes[idx]; } +std::string JoinFuzzer::makeJoinFilter( + const std::vector& probeKeys, + const std::vector& buildKeys, + const std::vector& probeInput, + const std::vector& buildInput) { + const auto numKeys = probeKeys.size(); + std::string filter; + VELOX_CHECK_EQ(numKeys, buildKeys.size()); + for (auto i = 0; i < numKeys; ++i) { + if (i > 0) { + filter += " AND "; + } + filter += fmt::format("{} = {}", probeKeys[i], buildKeys[i]); + } + // Add a semi filter 10% of the time. + if (vectorFuzzer_.coinToss(0.1) && !probeInput.empty() && + !buildInput.empty()) { + RowTypePtr rowType = vectorFuzzer_.coinToss(0.5) + ? asRowType(probeInput[0]->type()) + : asRowType(buildInput[0]->type()); + + for (int i = 0; i < rowType->size(); i++) { + // TODO: Add support for non-boolean types. + if (rowType->childAt(i)->isBoolean()) { + filter += fmt::format(" AND {} = true", rowType->nameOf(i)); + break; + } + } + } + LOG(INFO) << "Join filter: " << filter; + return filter; +} + std::vector JoinFuzzer::generateJoinKeyTypes(int32_t numKeys) { std::vector types; types.reserve(numKeys); @@ -690,6 +734,8 @@ JoinFuzzer::PlanWithSplits JoinFuzzer::makeDefaultPlan( const std::vector& buildInput, const std::vector& outputColumns) { auto planNodeIdGenerator = std::make_shared(); + std::string filter = + makeJoinFilter(probeKeys, buildKeys, probeInput, buildInput); auto plan = PlanBuilder(planNodeIdGenerator) .values(probeInput) @@ -773,22 +819,6 @@ std::vector makeSources( return sourceNodes; } -// Returns an equality join filter between probeKeys and buildKeys. -std::string makeJoinFilter( - const std::vector& probeKeys, - const std::vector& buildKeys) { - const auto numKeys = probeKeys.size(); - std::string filter; - VELOX_CHECK_EQ(numKeys, buildKeys.size()); - for (auto i = 0; i < numKeys; ++i) { - if (i > 0) { - filter += " AND "; - } - filter += fmt::format("{} = {}", probeKeys[i], buildKeys[i]); - } - return filter; -} - template void addFlippedJoinPlan( const core::PlanNodePtr& plan, @@ -846,8 +876,9 @@ JoinFuzzer::PlanWithSplits JoinFuzzer::makeNestedLoopJoinPlan( const std::vector& outputColumns, bool withFilter) { auto planNodeIdGenerator = std::make_shared(); - const std::string filter = - withFilter ? makeJoinFilter(probeKeys, buildKeys) : ""; + const std::string filter = withFilter + ? makeJoinFilter(probeKeys, buildKeys, probeInput, buildInput) + : ""; return JoinFuzzer::PlanWithSplits{ PlanBuilder(planNodeIdGenerator) .values(probeInput) @@ -1232,7 +1263,7 @@ JoinFuzzer::PlanWithSplits JoinFuzzer::makeNestedLoopJoinPlanWithTableScan( core::PlanNodeId buildScanId; const std::string filter = - withFilter ? makeJoinFilter(probeKeys, buildKeys) : ""; + withFilter ? makeJoinFilter(probeKeys, buildKeys, {}, {}) : ""; return JoinFuzzer::PlanWithSplits{ PlanBuilder(planNodeIdGenerator) .tableScan(probeType)