Skip to content

Commit

Permalink
fix(fuzzer): Remove hardcoded table names in Fuzzers (facebookincubat…
Browse files Browse the repository at this point in the history
…or#12120)

Summary:

Looking into the failed fuzzer job in D68359107 shows that the table name, "t_values", in the query does not match the table name of the created table, "tmp". This is because of a change made in D66977480 which updated the toSql(ValuesNode) method to return "t_<id>" instead of "tmp". That change ensured unique table names since PlanNode ids are unique within a plan.

The change in this diff deprecates hardcoding "tmp" as table names in place of using toSql. A toSql method was also added for TableScanNodes. There are many instances of this hardcoding which makes this diff a bit large.

Differential Revision: D68400743
  • Loading branch information
Daniel Hunte authored and facebook-github-bot committed Jan 21, 2025
1 parent 54c6d9e commit 51c27d7
Show file tree
Hide file tree
Showing 15 changed files with 179 additions and 211 deletions.
2 changes: 2 additions & 0 deletions velox/core/PlanNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,8 @@ class TableScanNode : public PlanNode {
assignments_;
};

using TableScanNodePtr = std::shared_ptr<const TableScanNode>;

class AggregationNode : public PlanNode {
public:
enum class Step {
Expand Down
10 changes: 5 additions & 5 deletions velox/exec/fuzzer/AggregationFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -722,7 +722,7 @@ bool AggregationFuzzer::verifyWindow(
if (!customVerification && enableWindowVerification) {
if (resultOrError.result) {
auto referenceResult =
computeReferenceResults(plan, input, referenceQueryRunner_.get());
computeReferenceResults(plan, referenceQueryRunner_.get());
stats_.updateReferenceQueryStats(referenceResult.second);
if (auto expectedResult = referenceResult.first) {
++stats_.numVerified;
Expand Down Expand Up @@ -1018,7 +1018,7 @@ void AggregationFuzzer::verifyAggregation(
std::optional<MaterializedRowMultiset> expectedResult;
if (!customVerification) {
auto referenceResult =
computeReferenceResults(plan, input, referenceQueryRunner_.get());
computeReferenceResults(plan, referenceQueryRunner_.get());
stats_.updateReferenceQueryStats(referenceResult.second);
expectedResult = referenceResult.first;
}
Expand Down Expand Up @@ -1099,8 +1099,8 @@ bool AggregationFuzzer::compareEquivalentPlanResults(

if (resultOrError.result != nullptr) {
if (!customVerification) {
auto referenceResult = computeReferenceResults(
firstPlan, input, referenceQueryRunner_.get());
auto referenceResult =
computeReferenceResults(firstPlan, referenceQueryRunner_.get());
stats_.updateReferenceQueryStats(referenceResult.second);
auto expectedResult = referenceResult.first;

Expand All @@ -1118,7 +1118,7 @@ bool AggregationFuzzer::compareEquivalentPlanResults(
if (isSupportedType(firstPlan->outputType()) &&
isSupportedType(input.front()->type())) {
auto referenceResult = computeReferenceResultsAsVector(
firstPlan, input, referenceQueryRunner_.get());
firstPlan, referenceQueryRunner_.get());
stats_.updateReferenceQueryStats(referenceResult.second);

if (referenceResult.first) {
Expand Down
35 changes: 23 additions & 12 deletions velox/exec/fuzzer/DuckQueryRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,6 @@ DuckQueryRunner::execute(const core::PlanNodePtr& plan) {
std::nullopt, ReferenceQueryErrorCode::kReferenceQueryUnsupported);
}

std::multiset<std::vector<velox::variant>> DuckQueryRunner::execute(
const std::string& sql,
const std::vector<RowVectorPtr>& input,
const RowTypePtr& resultType) {
DuckDbQueryRunner queryRunner;
queryRunner.createTable("tmp", input);
return queryRunner.execute(sql, resultType);
}

std::optional<std::string> DuckQueryRunner::toSql(
const core::PlanNodePtr& plan) {
if (!isSupportedType(plan->outputType())) {
Expand Down Expand Up @@ -190,6 +181,11 @@ std::optional<std::string> DuckQueryRunner::toSql(
return toSql(valuesNode);
}

if (const auto tableScanNode =
std::dynamic_pointer_cast<const core::TableScanNode>(plan)) {
return toSql(tableScanNode);
}

VELOX_NYI();
}

Expand Down Expand Up @@ -254,7 +250,12 @@ std::optional<std::string> DuckQueryRunner::toSql(
}
}

sql << " FROM tmp";
// AggregationNode should have a single source.
std::optional<std::string> source = toSql(aggregationNode->sources()[0]);
if (!source) {
return std::nullopt;
}
sql << " FROM " << *source;

if (!groupingKeys.empty()) {
sql << " GROUP BY " << folly::join(", ", groupingKeys);
Expand Down Expand Up @@ -335,7 +336,12 @@ std::optional<std::string> DuckQueryRunner::toSql(
}
}

sql << ") FROM tmp";
// WindowNode should have a single source.
std::optional<std::string> source = toSql(windowNode->sources()[0]);
if (!source) {
return std::nullopt;
}
sql << ") FROM " << *source;

return sql.str();
}
Expand All @@ -362,7 +368,12 @@ std::optional<std::string> DuckQueryRunner::toSql(
}
}

sql << ") as row_number FROM tmp";
// RowNumberNode should have a single source.
std::optional<std::string> source = toSql(rowNumberNode->sources()[0]);
if (!source) {
return std::nullopt;
}
sql << ") as row_number FROM " << *source;

return sql.str();
}
Expand Down
13 changes: 4 additions & 9 deletions velox/exec/fuzzer/DuckQueryRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,20 +50,15 @@ class DuckQueryRunner : public ReferenceQueryRunner {
/// Assumes that source of AggregationNode or Window Node is 'tmp' table.
std::optional<std::string> toSql(const core::PlanNodePtr& plan) override;

/// Executes the plan and returns the result along with success or fail error
/// code.
// Converts 'plan' into an SQL query and executes it. Result is returned as a
// MaterializedRowMultiset with the ReferenceQueryErrorCode::kSuccess if
// successful, or an std::nullopt with a ReferenceQueryErrorCode if the query
// fails.
std::pair<
std::optional<std::multiset<std::vector<velox::variant>>>,
ReferenceQueryErrorCode>
execute(const core::PlanNodePtr& plan) override;

/// Creates 'tmp' table with 'input' data and runs 'sql' query. Returns
/// results according to 'resultType' schema.
std::multiset<std::vector<velox::variant>> execute(
const std::string& sql,
const std::vector<RowVectorPtr>& input,
const RowTypePtr& resultType) override;

private:
using ReferenceQueryRunner::toSql;

Expand Down
38 changes: 2 additions & 36 deletions velox/exec/fuzzer/FuzzerUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -381,48 +381,14 @@ void registerHiveConnector(
std::pair<std::optional<MaterializedRowMultiset>, ReferenceQueryErrorCode>
computeReferenceResults(
const core::PlanNodePtr& plan,
const std::vector<RowVectorPtr>& input,
ReferenceQueryRunner* referenceQueryRunner) {
if (auto sql = referenceQueryRunner->toSql(plan)) {
try {
return std::make_pair(
referenceQueryRunner->execute(sql.value(), input, plan->outputType()),
ReferenceQueryErrorCode::kSuccess);
} catch (...) {
LOG(WARNING) << "Query failed in the reference DB";
return std::make_pair(
std::nullopt, ReferenceQueryErrorCode::kReferenceQueryFail);
}
}

LOG(INFO) << "Query not supported by the reference DB";
return std::make_pair(
std::nullopt, ReferenceQueryErrorCode::kReferenceQueryUnsupported);
return referenceQueryRunner->execute(plan);
}

std::pair<std::optional<std::vector<RowVectorPtr>>, ReferenceQueryErrorCode>
computeReferenceResultsAsVector(
const core::PlanNodePtr& plan,
const std::vector<RowVectorPtr>& input,
ReferenceQueryRunner* referenceQueryRunner) {
VELOX_CHECK(referenceQueryRunner->supportsVeloxVectorResults());

if (auto sql = referenceQueryRunner->toSql(plan)) {
try {
return std::make_pair(
referenceQueryRunner->executeVector(
sql.value(), input, plan->outputType()),
ReferenceQueryErrorCode::kSuccess);
} catch (...) {
LOG(WARNING) << "Query failed in the reference DB";
return std::make_pair(
std::nullopt, ReferenceQueryErrorCode::kReferenceQueryFail);
}
} else {
LOG(INFO) << "Query not supported by the reference DB";
}

return std::make_pair(
std::nullopt, ReferenceQueryErrorCode::kReferenceQueryUnsupported);
return referenceQueryRunner->executeAndReturnVector(plan);
}
} // namespace facebook::velox::exec::test
4 changes: 1 addition & 3 deletions velox/exec/fuzzer/FuzzerUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,14 +131,13 @@ void setupMemory(
void registerHiveConnector(
const std::unordered_map<std::string, std::string>& hiveConfigs);

// Converts 'plan' into an SQL query and runs it on 'input' in the reference DB.
// Converts 'plan' into an SQL query and runs in the reference DB.
// Result is returned as a MaterializedRowMultiset with the
// ReferenceQueryErrorCode::kSuccess if successful, or an std::nullopt with a
// ReferenceQueryErrorCode if the query fails.
std::pair<std::optional<MaterializedRowMultiset>, ReferenceQueryErrorCode>
computeReferenceResults(
const core::PlanNodePtr& plan,
const std::vector<RowVectorPtr>& input,
ReferenceQueryRunner* referenceQueryRunner);

// Similar to computeReferenceResults(), but returns the result as a
Expand All @@ -147,7 +146,6 @@ computeReferenceResults(
std::pair<std::optional<std::vector<RowVectorPtr>>, ReferenceQueryErrorCode>
computeReferenceResultsAsVector(
const core::PlanNodePtr& plan,
const std::vector<RowVectorPtr>& input,
ReferenceQueryRunner* referenceQueryRunner);

} // namespace facebook::velox::exec::test
Loading

0 comments on commit 51c27d7

Please sign in to comment.