From 51c27d70c7f414bac6e94c6b539bd0024d5ba75e Mon Sep 17 00:00:00 2001 From: Daniel Hunte Date: Tue, 21 Jan 2025 09:20:20 -0800 Subject: [PATCH] fix(fuzzer): Remove hardcoded table names in Fuzzers (#12120) Summary: Looking into the failed fuzzer job in D68359107 shows that the table name, "t_values", in the query does not match the table name of the created table, "tmp". This is because of a change made in D66977480 which updated the toSql(ValuesNode) method to return "t_" instead of "tmp". That change ensured unique table names since PlanNode ids are unique within a plan. The change in this diff deprecates hardcoding "tmp" as table names in place of using toSql. A toSql method was also added for TableScanNodes. There are many instances of this hardcoding which makes this diff a bit large. Differential Revision: D68400743 --- velox/core/PlanNode.h | 2 + velox/exec/fuzzer/AggregationFuzzer.cpp | 10 +- velox/exec/fuzzer/DuckQueryRunner.cpp | 35 ++-- velox/exec/fuzzer/DuckQueryRunner.h | 13 +- velox/exec/fuzzer/FuzzerUtil.cpp | 38 +---- velox/exec/fuzzer/FuzzerUtil.h | 4 +- velox/exec/fuzzer/PrestoQueryRunner.cpp | 150 +++++++++--------- velox/exec/fuzzer/PrestoQueryRunner.h | 37 ++--- velox/exec/fuzzer/ReferenceQueryRunner.cpp | 5 + velox/exec/fuzzer/ReferenceQueryRunner.h | 33 +++- velox/exec/fuzzer/RowNumberFuzzer.cpp | 9 +- velox/exec/fuzzer/WindowFuzzer.cpp | 2 +- velox/exec/fuzzer/WriterFuzzer.cpp | 19 +-- velox/exec/tests/PrestoQueryRunnerTest.cpp | 29 ++-- velox/expression/tests/ExpressionVerifier.cpp | 4 +- 15 files changed, 179 insertions(+), 211 deletions(-) diff --git a/velox/core/PlanNode.h b/velox/core/PlanNode.h index f9970f3f22675..ad69363cf29fd 100644 --- a/velox/core/PlanNode.h +++ b/velox/core/PlanNode.h @@ -589,6 +589,8 @@ class TableScanNode : public PlanNode { assignments_; }; +using TableScanNodePtr = std::shared_ptr; + class AggregationNode : public PlanNode { public: enum class Step { diff --git a/velox/exec/fuzzer/AggregationFuzzer.cpp b/velox/exec/fuzzer/AggregationFuzzer.cpp index d4cde4c8957a2..2b79320698be3 100644 --- a/velox/exec/fuzzer/AggregationFuzzer.cpp +++ b/velox/exec/fuzzer/AggregationFuzzer.cpp @@ -722,7 +722,7 @@ bool AggregationFuzzer::verifyWindow( if (!customVerification && enableWindowVerification) { if (resultOrError.result) { auto referenceResult = - computeReferenceResults(plan, input, referenceQueryRunner_.get()); + computeReferenceResults(plan, referenceQueryRunner_.get()); stats_.updateReferenceQueryStats(referenceResult.second); if (auto expectedResult = referenceResult.first) { ++stats_.numVerified; @@ -1018,7 +1018,7 @@ void AggregationFuzzer::verifyAggregation( std::optional expectedResult; if (!customVerification) { auto referenceResult = - computeReferenceResults(plan, input, referenceQueryRunner_.get()); + computeReferenceResults(plan, referenceQueryRunner_.get()); stats_.updateReferenceQueryStats(referenceResult.second); expectedResult = referenceResult.first; } @@ -1099,8 +1099,8 @@ bool AggregationFuzzer::compareEquivalentPlanResults( if (resultOrError.result != nullptr) { if (!customVerification) { - auto referenceResult = computeReferenceResults( - firstPlan, input, referenceQueryRunner_.get()); + auto referenceResult = + computeReferenceResults(firstPlan, referenceQueryRunner_.get()); stats_.updateReferenceQueryStats(referenceResult.second); auto expectedResult = referenceResult.first; @@ -1118,7 +1118,7 @@ bool AggregationFuzzer::compareEquivalentPlanResults( if (isSupportedType(firstPlan->outputType()) && isSupportedType(input.front()->type())) { auto referenceResult = computeReferenceResultsAsVector( - firstPlan, input, referenceQueryRunner_.get()); + firstPlan, referenceQueryRunner_.get()); stats_.updateReferenceQueryStats(referenceResult.second); if (referenceResult.first) { diff --git a/velox/exec/fuzzer/DuckQueryRunner.cpp b/velox/exec/fuzzer/DuckQueryRunner.cpp index 6d027300056f4..733675fec7eb1 100644 --- a/velox/exec/fuzzer/DuckQueryRunner.cpp +++ b/velox/exec/fuzzer/DuckQueryRunner.cpp @@ -134,15 +134,6 @@ DuckQueryRunner::execute(const core::PlanNodePtr& plan) { std::nullopt, ReferenceQueryErrorCode::kReferenceQueryUnsupported); } -std::multiset> DuckQueryRunner::execute( - const std::string& sql, - const std::vector& input, - const RowTypePtr& resultType) { - DuckDbQueryRunner queryRunner; - queryRunner.createTable("tmp", input); - return queryRunner.execute(sql, resultType); -} - std::optional DuckQueryRunner::toSql( const core::PlanNodePtr& plan) { if (!isSupportedType(plan->outputType())) { @@ -190,6 +181,11 @@ std::optional DuckQueryRunner::toSql( return toSql(valuesNode); } + if (const auto tableScanNode = + std::dynamic_pointer_cast(plan)) { + return toSql(tableScanNode); + } + VELOX_NYI(); } @@ -254,7 +250,12 @@ std::optional DuckQueryRunner::toSql( } } - sql << " FROM tmp"; + // AggregationNode should have a single source. + std::optional source = toSql(aggregationNode->sources()[0]); + if (!source) { + return std::nullopt; + } + sql << " FROM " << *source; if (!groupingKeys.empty()) { sql << " GROUP BY " << folly::join(", ", groupingKeys); @@ -335,7 +336,12 @@ std::optional DuckQueryRunner::toSql( } } - sql << ") FROM tmp"; + // WindowNode should have a single source. + std::optional source = toSql(windowNode->sources()[0]); + if (!source) { + return std::nullopt; + } + sql << ") FROM " << *source; return sql.str(); } @@ -362,7 +368,12 @@ std::optional DuckQueryRunner::toSql( } } - sql << ") as row_number FROM tmp"; + // RowNumberNode should have a single source. + std::optional source = toSql(rowNumberNode->sources()[0]); + if (!source) { + return std::nullopt; + } + sql << ") as row_number FROM " << *source; return sql.str(); } diff --git a/velox/exec/fuzzer/DuckQueryRunner.h b/velox/exec/fuzzer/DuckQueryRunner.h index 44a8275fd082e..3389d1ac3f4b9 100644 --- a/velox/exec/fuzzer/DuckQueryRunner.h +++ b/velox/exec/fuzzer/DuckQueryRunner.h @@ -50,20 +50,15 @@ class DuckQueryRunner : public ReferenceQueryRunner { /// Assumes that source of AggregationNode or Window Node is 'tmp' table. std::optional toSql(const core::PlanNodePtr& plan) override; - /// Executes the plan and returns the result along with success or fail error - /// code. + // Converts 'plan' into an SQL query and executes it. Result is returned as a + // MaterializedRowMultiset with the ReferenceQueryErrorCode::kSuccess if + // successful, or an std::nullopt with a ReferenceQueryErrorCode if the query + // fails. std::pair< std::optional>>, ReferenceQueryErrorCode> execute(const core::PlanNodePtr& plan) override; - /// Creates 'tmp' table with 'input' data and runs 'sql' query. Returns - /// results according to 'resultType' schema. - std::multiset> execute( - const std::string& sql, - const std::vector& input, - const RowTypePtr& resultType) override; - private: using ReferenceQueryRunner::toSql; diff --git a/velox/exec/fuzzer/FuzzerUtil.cpp b/velox/exec/fuzzer/FuzzerUtil.cpp index 91416814e9313..dd3b86ddc726d 100644 --- a/velox/exec/fuzzer/FuzzerUtil.cpp +++ b/velox/exec/fuzzer/FuzzerUtil.cpp @@ -381,48 +381,14 @@ void registerHiveConnector( std::pair, ReferenceQueryErrorCode> computeReferenceResults( const core::PlanNodePtr& plan, - const std::vector& input, ReferenceQueryRunner* referenceQueryRunner) { - if (auto sql = referenceQueryRunner->toSql(plan)) { - try { - return std::make_pair( - referenceQueryRunner->execute(sql.value(), input, plan->outputType()), - ReferenceQueryErrorCode::kSuccess); - } catch (...) { - LOG(WARNING) << "Query failed in the reference DB"; - return std::make_pair( - std::nullopt, ReferenceQueryErrorCode::kReferenceQueryFail); - } - } - - LOG(INFO) << "Query not supported by the reference DB"; - return std::make_pair( - std::nullopt, ReferenceQueryErrorCode::kReferenceQueryUnsupported); + return referenceQueryRunner->execute(plan); } std::pair>, ReferenceQueryErrorCode> computeReferenceResultsAsVector( const core::PlanNodePtr& plan, - const std::vector& input, ReferenceQueryRunner* referenceQueryRunner) { - VELOX_CHECK(referenceQueryRunner->supportsVeloxVectorResults()); - - if (auto sql = referenceQueryRunner->toSql(plan)) { - try { - return std::make_pair( - referenceQueryRunner->executeVector( - sql.value(), input, plan->outputType()), - ReferenceQueryErrorCode::kSuccess); - } catch (...) { - LOG(WARNING) << "Query failed in the reference DB"; - return std::make_pair( - std::nullopt, ReferenceQueryErrorCode::kReferenceQueryFail); - } - } else { - LOG(INFO) << "Query not supported by the reference DB"; - } - - return std::make_pair( - std::nullopt, ReferenceQueryErrorCode::kReferenceQueryUnsupported); + return referenceQueryRunner->executeAndReturnVector(plan); } } // namespace facebook::velox::exec::test diff --git a/velox/exec/fuzzer/FuzzerUtil.h b/velox/exec/fuzzer/FuzzerUtil.h index 172ac8b9c44ed..108f190efed1b 100644 --- a/velox/exec/fuzzer/FuzzerUtil.h +++ b/velox/exec/fuzzer/FuzzerUtil.h @@ -131,14 +131,13 @@ void setupMemory( void registerHiveConnector( const std::unordered_map& hiveConfigs); -// Converts 'plan' into an SQL query and runs it on 'input' in the reference DB. +// Converts 'plan' into an SQL query and runs in the reference DB. // Result is returned as a MaterializedRowMultiset with the // ReferenceQueryErrorCode::kSuccess if successful, or an std::nullopt with a // ReferenceQueryErrorCode if the query fails. std::pair, ReferenceQueryErrorCode> computeReferenceResults( const core::PlanNodePtr& plan, - const std::vector& input, ReferenceQueryRunner* referenceQueryRunner); // Similar to computeReferenceResults(), but returns the result as a @@ -147,7 +146,6 @@ computeReferenceResults( std::pair>, ReferenceQueryErrorCode> computeReferenceResultsAsVector( const core::PlanNodePtr& plan, - const std::vector& input, ReferenceQueryRunner* referenceQueryRunner); } // namespace facebook::velox::exec::test diff --git a/velox/exec/fuzzer/PrestoQueryRunner.cpp b/velox/exec/fuzzer/PrestoQueryRunner.cpp index b3fabcf6ed0d0..2e9375db2c3c3 100644 --- a/velox/exec/fuzzer/PrestoQueryRunner.cpp +++ b/velox/exec/fuzzer/PrestoQueryRunner.cpp @@ -203,6 +203,11 @@ std::optional PrestoQueryRunner::toSql( return toSql(valuesNode); } + if (const auto tableScanNode = + std::dynamic_pointer_cast(plan)) { + return toSql(tableScanNode); + } + VELOX_NYI(); } @@ -300,8 +305,12 @@ std::optional PrestoQueryRunner::toSql( sql << " as " << aggregationNode->aggregateNames()[i]; } } - - sql << " FROM tmp"; + // AggregationNode should have a single source. + std::optional source = toSql(aggregationNode->sources()[0]); + if (!source) { + return std::nullopt; + } + sql << " FROM " << *source; if (!groupingKeys.empty()) { sql << " GROUP BY " << folly::join(", ", groupingKeys); @@ -403,7 +412,12 @@ std::optional PrestoQueryRunner::toSql( sql << ")"; } - sql << " FROM tmp"; + // WindowNode should have a single source. + std::optional source = toSql(windowNode->sources()[0]); + if (!source) { + return std::nullopt; + } + sql << " FROM " << *source; return sql.str(); } @@ -474,7 +488,12 @@ std::optional PrestoQueryRunner::toSql( } } - sql << ") as row_number FROM tmp"; + // RowNumberNode should have a single source. + std::optional source = toSql(rowNumberNode->sources()[0]); + if (!source) { + return std::nullopt; + } + sql << ") as row_number FROM " << *source; return sql.str(); } @@ -493,7 +512,7 @@ std::optional PrestoQueryRunner::toSql( // SORTED_BY = ARRAY['s0 ASC', 's1 DESC'], // FORMAT = 'ORC' // ) - // AS SELECT * FROM tmp + // AS SELECT * FROM t_ std::stringstream sql; sql << "CREATE TABLE tmp_write"; std::vector partitionKeys; @@ -538,7 +557,13 @@ std::optional PrestoQueryRunner::toSql( } } - sql << "FORMAT = 'ORC') AS SELECT * FROM tmp"; + // TableWriteNode should have a single source. + std::optional source = toSql(tableWriteNode->sources()[0]); + if (!source) { + return std::nullopt; + } + sql << "FORMAT = 'ORC') AS SELECT * FROM " << *source; + return sql.str(); } @@ -546,28 +571,15 @@ std::pair< std::optional>>, ReferenceQueryErrorCode> PrestoQueryRunner::execute(const core::PlanNodePtr& plan) { - if (std::optional sql = toSql(plan)) { - try { - return std::make_pair( - exec::test::materialize(executeAndReturnVector(*sql, plan)), - ReferenceQueryErrorCode::kSuccess); - } catch (...) { - LOG(WARNING) << "Query failed in Presto"; - return std::make_pair( - std::nullopt, ReferenceQueryErrorCode::kReferenceQueryFail); - } - } - - LOG(INFO) << "Query not supported in Presto"; - return std::make_pair( - std::nullopt, ReferenceQueryErrorCode::kReferenceQueryUnsupported); -} - -std::multiset> PrestoQueryRunner::execute( - const std::string& sql, - const std::vector& input, - const RowTypePtr& resultType) { - return exec::test::materialize(executeVector(sql, input, resultType)); + std::pair< + std::optional>, + ReferenceQueryErrorCode> + result = executeAndReturnVector(plan); + if (result.first) { + return std::make_pair( + exec::test::materialize(*result.first), result.second); + } + return std::make_pair(std::nullopt, result.second); } std::string PrestoQueryRunner::createTable( @@ -601,59 +613,47 @@ std::string PrestoQueryRunner::createTable( return tableDirectoryPath; } -std::vector PrestoQueryRunner::executeAndReturnVector( - const std::string& sql, - const core::PlanNodePtr& plan) { - std::unordered_map> inputMap = - getAllTables(plan); - for (const auto& [tableName, input] : inputMap) { - auto inputType = asRowType(input[0]->type()); - if (inputType->size() == 0) { - inputMap[tableName] = { - makeNullRows(input, fmt::format("{}x", tableName), pool())}; - } - } - - auto writerPool = aggregatePool()->addAggregateChild("writer"); - for (const auto& [tableName, input] : inputMap) { - auto tableDirectoryPath = createTable(tableName, input[0]->type()); +std::pair< + std::optional>, + ReferenceQueryErrorCode> +PrestoQueryRunner::executeAndReturnVector(const core::PlanNodePtr& plan) { + if (std::optional sql = toSql(plan)) { + try { + std::unordered_map> + inputMap = getAllTables(plan); + for (const auto& [tableName, input] : inputMap) { + auto inputType = asRowType(input[0]->type()); + if (inputType->size() == 0) { + inputMap[tableName] = { + makeNullRows(input, fmt::format("{}x", tableName), pool())}; + } + } - // Create a new file in table's directory with fuzzer-generated data. - auto filePath = fs::path(tableDirectoryPath) - .append(fmt::format("{}.dwrf", tableName)) - .string() - .substr(strlen("file:")); + auto writerPool = aggregatePool()->addAggregateChild("writer"); + for (const auto& [tableName, input] : inputMap) { + auto tableDirectoryPath = createTable(tableName, input[0]->type()); - writeToFile(filePath, input, writerPool.get()); - } + // Create a new file in table's directory with fuzzer-generated data. + auto filePath = fs::path(tableDirectoryPath) + .append(fmt::format("{}.dwrf", tableName)) + .string() + .substr(strlen("file:")); - // Run the query. - return execute(sql); -} + writeToFile(filePath, input, writerPool.get()); + } -std::vector PrestoQueryRunner::executeVector( - const std::string& sql, - const std::vector& input, - const velox::RowTypePtr& resultType) { - auto inputType = asRowType(input[0]->type()); - if (inputType->size() == 0) { - auto rowVector = makeNullRows(input, "x", pool()); - return executeVector(sql, {rowVector}, resultType); + // Run the query. + return std::make_pair(execute(*sql), ReferenceQueryErrorCode::kSuccess); + } catch (...) { + LOG(WARNING) << "Query failed in Presto"; + return std::make_pair( + std::nullopt, ReferenceQueryErrorCode::kReferenceQueryFail); + } } - auto tableDirectoryPath = createTable("tmp", input[0]->type()); - - // Create a new file in table's directory with fuzzer-generated data. - auto newFilePath = fs::path(tableDirectoryPath) - .append("fuzzer.dwrf") - .string() - .substr(strlen("file:")); - - auto writerPool = aggregatePool()->addAggregateChild("writer"); - writeToFile(newFilePath, input, writerPool.get()); - - // Run the query. - return execute(sql); + LOG(INFO) << "Query not supported in Presto"; + return std::make_pair( + std::nullopt, ReferenceQueryErrorCode::kReferenceQueryUnsupported); } std::vector PrestoQueryRunner::execute(const std::string& sql) { diff --git a/velox/exec/fuzzer/PrestoQueryRunner.h b/velox/exec/fuzzer/PrestoQueryRunner.h index 7f74f794e5039..ca6083c7a099e 100644 --- a/velox/exec/fuzzer/PrestoQueryRunner.h +++ b/velox/exec/fuzzer/PrestoQueryRunner.h @@ -69,26 +69,22 @@ class PrestoQueryRunner : public velox::exec::test::ReferenceQueryRunner { bool isSupported(const exec::FunctionSignature& signature) override; - /// Creates 'tmp' table using specified data, executes SQL query generated by - /// 'toSql' and returns the results. - /// - /// @param sql SQL generated by 'toSql' method. - /// @param input Data used in the Values node in the plan passed to 'toSql' - /// method. - /// @param resultType Expected type of the results. - /// @return Data received from Presto. - std::multiset> execute( - const std::string& sql, - const std::vector& input, - const velox::RowTypePtr& resultType) override; - - /// Executes the plan and returns the result along with success or fail error - /// code. + // Converts 'plan' into an SQL query and executes it. Result is returned as a + // MaterializedRowMultiset with the ReferenceQueryErrorCode::kSuccess if + // successful, or an std::nullopt with a ReferenceQueryErrorCode if the query + // fails. std::pair< std::optional>>, ReferenceQueryErrorCode> execute(const core::PlanNodePtr& plan) override; + /// Similar to 'execute' but returns results in RowVector format. + /// Caller should ensure 'supportsVeloxVectorResults' returns true. + std::pair< + std::optional>, + ReferenceQueryErrorCode> + executeAndReturnVector(const core::PlanNodePtr& plan) override; + /// Executes Presto SQL query and returns the results. Tables referenced by /// the query must already exist. std::vector execute(const std::string& sql) override; @@ -100,11 +96,6 @@ class PrestoQueryRunner : public velox::exec::test::ReferenceQueryRunner { bool supportsVeloxVectorResults() const override; - std::vector executeVector( - const std::string& sql, - const std::vector& input, - const RowTypePtr& resultType) override; - std::shared_ptr queryRunnerContext() { return queryRunnerContext_; } @@ -132,12 +123,6 @@ class PrestoQueryRunner : public velox::exec::test::ReferenceQueryRunner { std::optional toSql( const std::shared_ptr& tableWriteNode); - /// Executes SQL query returned by the 'toSql' method based on the plan. - /// Returns std::nullopt if the plan is not supported. - std::vector executeAndReturnVector( - const std::string& sql, - const core::PlanNodePtr& plan); - std::string startQuery( const std::string& sql, const std::string& sessionProperty = ""); diff --git a/velox/exec/fuzzer/ReferenceQueryRunner.cpp b/velox/exec/fuzzer/ReferenceQueryRunner.cpp index 6dcf7540e5ef6..65395fe718a4d 100644 --- a/velox/exec/fuzzer/ReferenceQueryRunner.cpp +++ b/velox/exec/fuzzer/ReferenceQueryRunner.cpp @@ -108,6 +108,11 @@ std::optional ReferenceQueryRunner::toSql( return getTableName(valuesNode); } +std::optional ReferenceQueryRunner::toSql( + const core::TableScanNodePtr& tableScanNode) { + return tableScanNode->tableHandle()->name(); +} + std::optional ReferenceQueryRunner::toSql( const std::shared_ptr& joinNode) { if (!isSupportedDwrfType(joinNode->sources()[0]->outputType()) || diff --git a/velox/exec/fuzzer/ReferenceQueryRunner.h b/velox/exec/fuzzer/ReferenceQueryRunner.h index 5f0c74e5b9228..e17b184907a64 100644 --- a/velox/exec/fuzzer/ReferenceQueryRunner.h +++ b/velox/exec/fuzzer/ReferenceQueryRunner.h @@ -67,6 +67,10 @@ class ReferenceQueryRunner { virtual std::optional toSql( const core::ValuesNodePtr& valuesNode); + /// Same as the above toSql but for table scan nodes. + virtual std::optional toSql( + const core::TableScanNodePtr& tableScanNode); + /// Same as the above toSql but for hash join nodes. virtual std::optional toSql( const std::shared_ptr& joinNode); @@ -87,8 +91,19 @@ class ReferenceQueryRunner { return true; } - /// Executes the plan and returns the result along with success or fail error - /// code. + /// Executes SQL query returned by the 'toSql' method using 'input' data. + /// Converts results using 'resultType' schema. + virtual std::multiset> execute( + const std::string& /*sql*/, + const std::vector& /*input*/, + const velox::RowTypePtr& /*resultType*/) { + VELOX_UNSUPPORTED(); + } + + // Converts 'plan' into an SQL query and executes it. Result is returned as a + // MaterializedRowMultiset with the ReferenceQueryErrorCode::kSuccess if + // successful, or an std::nullopt with a ReferenceQueryErrorCode if the query + // fails. virtual std::pair< std::optional>>, ReferenceQueryErrorCode> @@ -96,12 +111,14 @@ class ReferenceQueryRunner { VELOX_UNSUPPORTED(); } - /// Executes SQL query returned by the 'toSql' method using 'input' data. - /// Converts results using 'resultType' schema. - virtual std::multiset> execute( - const std::string& sql, - const std::vector& input, - const RowTypePtr& resultType) = 0; + /// Similar to 'execute' but returns results in RowVector format. + /// Caller should ensure 'supportsVeloxVectorResults' returns true. + virtual std::pair< + std::optional>, + ReferenceQueryErrorCode> + executeAndReturnVector(const core::PlanNodePtr& /*plan*/) { + VELOX_UNSUPPORTED(); + } /// Executes SQL query returned by the 'toSql' method using 'probeInput' and /// 'buildInput' data for join node. diff --git a/velox/exec/fuzzer/RowNumberFuzzer.cpp b/velox/exec/fuzzer/RowNumberFuzzer.cpp index e5fe376686778..9622e34b98087 100644 --- a/velox/exec/fuzzer/RowNumberFuzzer.cpp +++ b/velox/exec/fuzzer/RowNumberFuzzer.cpp @@ -280,14 +280,7 @@ RowNumberFuzzer::computeReferenceResults( if (test::containsUnsupportedTypes(input[0]->type())) { return std::nullopt; } - - if (auto sql = referenceQueryRunner_->toSql(plan)) { - return referenceQueryRunner_->execute( - sql.value(), input, plan->outputType()); - } - - LOG(INFO) << "Query not supported by the reference DB"; - return std::nullopt; + return referenceQueryRunner_->execute(plan).first; } RowVectorPtr RowNumberFuzzer::execute( diff --git a/velox/exec/fuzzer/WindowFuzzer.cpp b/velox/exec/fuzzer/WindowFuzzer.cpp index 8c1bebbf72402..6a3b8ff587c58 100644 --- a/velox/exec/fuzzer/WindowFuzzer.cpp +++ b/velox/exec/fuzzer/WindowFuzzer.cpp @@ -725,7 +725,7 @@ bool WindowFuzzer::verifyWindow( .push_back(prestoFrameClause); } auto referenceResult = - computeReferenceResults(plan, input, referenceQueryRunner_.get()); + computeReferenceResults(plan, referenceQueryRunner_.get()); if (isPrestoQueryRunner) { prestoQueryRunner->queryRunnerContext()->windowFrames_.clear(); } diff --git a/velox/exec/fuzzer/WriterFuzzer.cpp b/velox/exec/fuzzer/WriterFuzzer.cpp index d3d60e49aaf21..b59f9e5678c3b 100644 --- a/velox/exec/fuzzer/WriterFuzzer.cpp +++ b/velox/exec/fuzzer/WriterFuzzer.cpp @@ -481,26 +481,21 @@ void WriterFuzzer::verifyWriter( return; } - const auto dropSql = "DROP TABLE IF EXISTS tmp_write"; - const auto sql = referenceQueryRunner_->toSql(plan).value(); - std::multiset> expectedResult; - try { - referenceQueryRunner_->execute(dropSql); - expectedResult = - referenceQueryRunner_->execute(sql, input, plan->outputType()); - } catch (...) { - LOG(WARNING) << "Query failed in the reference DB"; + referenceQueryRunner_->execute("DROP TABLE IF EXISTS tmp_write"); + std::optional>> expectedResult = + referenceQueryRunner_->execute(plan).first; + if (!expectedResult.has_value()) { return; } // 1. Verifies the table writer output result: the inserted number of rows. VELOX_CHECK_EQ( - expectedResult.size(), // Presto sql only produces one row which is how - // many rows are inserted. + expectedResult->size(), // Presto sql only produces one row which is + // how many rows are inserted. 1, "Query returned unexpected result in the reference DB"); VELOX_CHECK( - assertEqualResults(expectedResult, plan->outputType(), {result}), + assertEqualResults(*expectedResult, plan->outputType(), {result}), "Velox and reference DB results don't match"); // 2. Verifies directory layout for partitioned (bucketed) table. diff --git a/velox/exec/tests/PrestoQueryRunnerTest.cpp b/velox/exec/tests/PrestoQueryRunnerTest.cpp index 14447f5eb3967..bb0260b58f315 100644 --- a/velox/exec/tests/PrestoQueryRunnerTest.cpp +++ b/velox/exec/tests/PrestoQueryRunnerTest.cpp @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include "velox/common/base/tests/GTestUtils.h" @@ -98,18 +99,15 @@ TEST_F(PrestoQueryRunnerTest, DISABLED_fuzzer) { "http://127.0.0.1:8080", "hive", static_cast(1000)); - auto sql = queryRunner->toSql(plan); - ASSERT_TRUE(sql.has_value()); - auto prestoResults = queryRunner->execute( - sql.value(), - {data}, - ROW({"a", "b", "c"}, {BIGINT(), BIGINT(), ARRAY(BIGINT())})); + std::optional>> prestoResults = + queryRunner->execute(plan).first; + ASSERT_TRUE(prestoResults.has_value()); auto veloxResults = velox::exec::test::AssertQueryBuilder(plan).copyResults(pool()); velox::exec::test::assertEqualResults( - prestoResults, plan->outputType(), {veloxResults}); + *prestoResults, plan->outputType(), {veloxResults}); } TEST_F(PrestoQueryRunnerTest, sortedAggregation) { @@ -135,9 +133,10 @@ TEST_F(PrestoQueryRunnerTest, sortedAggregation) { auto sql = queryRunner->toSql(plan); ASSERT_TRUE(sql.has_value()); - ASSERT_EQ( - "SELECT multimap_agg(c0, c1 ORDER BY c0 ASC NULLS LAST) as a0 FROM tmp", - sql.value()); + ASSERT_THAT( + *sql, + ::testing::HasSubstr( + "SELECT multimap_agg(c0, c1 ORDER BY c0 ASC NULLS LAST) as a0 FROM ")); // Plans with multiple order by's in the aggregate. @@ -151,9 +150,10 @@ TEST_F(PrestoQueryRunnerTest, sortedAggregation) { sql = queryRunner->toSql(plan); ASSERT_TRUE(sql.has_value()); - ASSERT_EQ( - "SELECT multimap_agg(c0, c1 ORDER BY c1 ASC NULLS FIRST, c0 DESC NULLS LAST, c2 ASC NULLS LAST) as a0 FROM tmp", - sql.value()); + ASSERT_THAT( + *sql, + ::testing::HasSubstr( + "SELECT multimap_agg(c0, c1 ORDER BY c1 ASC NULLS FIRST, c0 DESC NULLS LAST, c2 ASC NULLS LAST) as a0 FROM ")); } TEST_F(PrestoQueryRunnerTest, distinctAggregation) { @@ -174,7 +174,8 @@ TEST_F(PrestoQueryRunnerTest, distinctAggregation) { auto sql = queryRunner->toSql(plan); ASSERT_TRUE(sql.has_value()); - ASSERT_EQ("SELECT array_agg(distinct c0) as a0 FROM tmp", sql.value()); + ASSERT_THAT( + *sql, ::testing::HasSubstr("SELECT array_agg(distinct c0) as a0 FROM ")); } TEST_F(PrestoQueryRunnerTest, toSql) { diff --git a/velox/expression/tests/ExpressionVerifier.cpp b/velox/expression/tests/ExpressionVerifier.cpp index 8dd7054c30788..f5c1d4829efe7 100644 --- a/velox/expression/tests/ExpressionVerifier.cpp +++ b/velox/expression/tests/ExpressionVerifier.cpp @@ -287,8 +287,8 @@ std::vector ExpressionVerifier::verify( VLOG(1) << "Execute with reference DB."; auto inputRowVector = reduceToSelectedRows(rowVector, rows); auto projectionPlan = makeProjectionPlan(inputRowVector, plans); - auto referenceResultOrError = computeReferenceResults( - projectionPlan, {inputRowVector}, referenceQueryRunner_.get()); + auto referenceResultOrError = + computeReferenceResults(projectionPlan, referenceQueryRunner_.get()); auto referenceEvalResult = referenceResultOrError.first;