Skip to content

Commit

Permalink
feat(fuzzer): Add functions that make multi-join plans with values nodes
Browse files Browse the repository at this point in the history
Summary:
Generates a cascading multi-join from left to right with the inputs to the join being values nodes.

[t1, t2, t3, t4]
```
t1  t2
 \  /
  a   t3
   \  /
     b   t4
      \  /
        c
```

Differential Revision: D68797355
  • Loading branch information
Daniel Hunte authored and facebook-github-bot committed Jan 28, 2025
1 parent ce273fa commit d3f0e8d
Showing 1 changed file with 94 additions and 0 deletions.
94 changes: 94 additions & 0 deletions velox/exec/fuzzer/JoinFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,17 @@ class JoinFuzzer {
numGroups(_numGroups) {}
};

struct JoinData {
core::JoinType joinType;
bool nullAware;
std::vector<std::string> probeKeys;
std::vector<std::string> buildKeys;
core::PlanNodePtr probeInput;
std::vector<RowVectorPtr> buildInput;
std::vector<std::string> outputColumns;
std::string filter;
};

static core::PlanNodePtr tryFlipJoinSides(const core::HashJoinNode& joinNode);
static core::PlanNodePtr tryFlipJoinSides(
const core::MergeJoinNode& joinNode);
Expand Down Expand Up @@ -160,6 +171,13 @@ class JoinFuzzer {
const std::vector<std::string>& outputColumns,
const std::string& filter);

// Constructs a cascading multi-join plan with hash join nodes.
// joinDataList[0].probeInput should be a single values node made using the
// same planNodeIdGenerator.
JoinFuzzer::PlanWithSplits makeDefaultPlan(
std::shared_ptr<core::PlanNodeIdGenerator> planNodeIdGenerator,
const std::vector<JoinData>& joinDataList);

JoinFuzzer::PlanWithSplits makeMergeJoinPlan(
core::JoinType joinType,
const std::vector<std::string>& probeKeys,
Expand All @@ -169,6 +187,13 @@ class JoinFuzzer {
const std::vector<std::string>& outputColumns,
const std::string& filter);

// Constructs a cascading multi-join plan with merge join nodes.
// joinDataList[0].probeInput should be a single values node made using the
// same planNodeIdGenerator.
JoinFuzzer::PlanWithSplits makeMergeJoinPlan(
std::shared_ptr<core::PlanNodeIdGenerator> planNodeIdGenerator,
const std::vector<JoinData>& joinDataList);

// Returns a PlanWithSplits for NestedLoopJoin with inputs from Values nodes.
// If withFilter is true, uses the equality filter between probeKeys and
// buildKeys as the join filter. Uses empty join filter otherwise.
Expand All @@ -181,6 +206,13 @@ class JoinFuzzer {
const std::vector<std::string>& outputColumns,
const std::string& filter);

// Constructs a cascading multi-join plan with nested loop join nodes.
// joinDataList[0].probeInput should be a single values node made using the
// same planNodeIdGenerator.
JoinFuzzer::PlanWithSplits makeNestedLoopJoinPlan(
std::shared_ptr<core::PlanNodeIdGenerator> planNodeIdGenerator,
const std::vector<JoinData>& joinDataList);

// Makes the default query plan with table scan as inputs for both probe and
// build sides.
JoinFuzzer::PlanWithSplits makeDefaultPlanWithTableScan(
Expand Down Expand Up @@ -762,6 +794,26 @@ JoinFuzzer::PlanWithSplits JoinFuzzer::makeDefaultPlan(
return PlanWithSplits{plan};
}

JoinFuzzer::PlanWithSplits makeDefaultPlan(
std::shared_ptr<core::PlanNodeIdGenerator> planNodeIdGenerator,
const std::vector<JoinFuzzer::JoinData>& joinDataList) {
VELOX_CHECK_GT(joinDataList.size(), 0);
PlanBuilder plan = PlanBuilder(
/*initialPlanNode=*/joinDataList[0].probeInput, planNodeIdGenerator);
for (const JoinFuzzer::JoinData& joinData : joinDataList) {
plan.hashJoin(
joinData.probeKeys,
joinData.buildKeys,
/*build=*/
PlanBuilder(planNodeIdGenerator).values(joinData.buildInput).planNode(),
joinData.filter,
joinData.outputColumns,
joinData.joinType,
joinData.nullAware);
}
return JoinFuzzer::PlanWithSplits{plan.planNode()};
}

JoinFuzzer::PlanWithSplits JoinFuzzer::makeDefaultPlanWithTableScan(
core::JoinType joinType,
bool nullAware,
Expand Down Expand Up @@ -896,6 +948,29 @@ JoinFuzzer::PlanWithSplits JoinFuzzer::makeMergeJoinPlan(
.planNode()};
}

JoinFuzzer::PlanWithSplits makeMergeJoinPlan(
std::shared_ptr<core::PlanNodeIdGenerator> planNodeIdGenerator,
const std::vector<JoinFuzzer::JoinData>& joinDataList) {
VELOX_CHECK_GT(joinDataList.size(), 0);
PlanBuilder plan = PlanBuilder(
/*initialPlanNode=*/joinDataList[0].probeInput, planNodeIdGenerator);
for (const JoinFuzzer::JoinData& joinData : joinDataList) {
plan.orderBy(joinData.probeKeys, false)
.mergeJoin(
joinData.probeKeys,
joinData.buildKeys,
/*build=*/
PlanBuilder(planNodeIdGenerator)
.values(joinData.buildInput)
.orderBy(joinData.buildKeys, false)
.planNode(),
joinData.filter,
joinData.outputColumns,
joinData.joinType);
}
return JoinFuzzer::PlanWithSplits{plan.planNode()};
}

JoinFuzzer::PlanWithSplits JoinFuzzer::makeNestedLoopJoinPlan(
core::JoinType joinType,
const std::vector<std::string>& probeKeys,
Expand All @@ -916,6 +991,25 @@ JoinFuzzer::PlanWithSplits JoinFuzzer::makeNestedLoopJoinPlan(
.planNode()};
}

JoinFuzzer::PlanWithSplits makeNestedLoopJoinPlan(
std::shared_ptr<core::PlanNodeIdGenerator> planNodeIdGenerator,
const std::vector<JoinFuzzer::JoinData>& joinDataList) {
VELOX_CHECK_GT(joinDataList.size(), 0);
PlanBuilder plan = PlanBuilder(
/*initialPlanNode=*/joinDataList[0].probeInput, planNodeIdGenerator);
for (const JoinFuzzer::JoinData& joinData : joinDataList) {
plan.orderBy(joinData.probeKeys, false)
.nestedLoopJoin(
/*right=*/PlanBuilder(planNodeIdGenerator)
.values(joinData.buildInput)
.planNode(),
joinData.filter,
joinData.outputColumns,
joinData.joinType);
}
return JoinFuzzer::PlanWithSplits{plan.planNode()};
}

void JoinFuzzer::makeAlternativePlans(
const core::PlanNodePtr& plan,
const std::vector<RowVectorPtr>& probeInput,
Expand Down

0 comments on commit d3f0e8d

Please sign in to comment.