diff --git a/velox/benchmarks/tpch/TpchBenchmark.cpp b/velox/benchmarks/tpch/TpchBenchmark.cpp index fb1862ab15f3..2405714ea906 100644 --- a/velox/benchmarks/tpch/TpchBenchmark.cpp +++ b/velox/benchmarks/tpch/TpchBenchmark.cpp @@ -494,6 +494,11 @@ BENCHMARK(q1) { benchmark.run(planContext); } +BENCHMARK(q2) { + const auto planContext = queryBuilder->getQueryPlan(2); + benchmark.run(planContext); +} + BENCHMARK(q3) { const auto planContext = queryBuilder->getQueryPlan(3); benchmark.run(planContext); diff --git a/velox/dwio/parquet/tests/ParquetTpchTest.cpp b/velox/dwio/parquet/tests/ParquetTpchTest.cpp index 47e406235ebe..b2a32db701ca 100644 --- a/velox/dwio/parquet/tests/ParquetTpchTest.cpp +++ b/velox/dwio/parquet/tests/ParquetTpchTest.cpp @@ -161,6 +161,11 @@ TEST_F(ParquetTpchTest, Q1) { assertQuery(1); } +TEST_F(ParquetTpchTest, Q2) { + std::vector sortingKeys{0, 1, 2, 3}; + assertQuery(2, std::move(sortingKeys)); +} + TEST_F(ParquetTpchTest, Q3) { std::vector sortingKeys{1, 2}; assertQuery(3, std::move(sortingKeys)); diff --git a/velox/exec/tests/utils/TpchQueryBuilder.cpp b/velox/exec/tests/utils/TpchQueryBuilder.cpp index e52e4f3b66f7..3c2d6de8fe64 100644 --- a/velox/exec/tests/utils/TpchQueryBuilder.cpp +++ b/velox/exec/tests/utils/TpchQueryBuilder.cpp @@ -142,6 +142,8 @@ TpchPlan TpchQueryBuilder::getQueryPlan(int queryId) const { switch (queryId) { case 1: return getQ1Plan(); + case 2: + return getQ2Plan(); case 3: return getQ3Plan(); case 5: @@ -238,6 +240,207 @@ TpchPlan TpchQueryBuilder::getQ1Plan() const { return context; } +TpchPlan TpchQueryBuilder::getQ2Plan() const { + std::vector supplierColumnsSubQuery = { + "s_suppkey", "s_nationkey"}; + std::vector nationColumnsSubQuery = { + "n_nationkey", "n_regionkey"}; + std::vector supplierColumns = { + "s_acctbal", + "s_name", + "s_address", + "s_phone", + "s_comment", + "s_suppkey", + "s_nationkey"}; + std::vector partColumns = { + "p_partkey", "p_mfgr", "p_size", "p_type"}; + std::vector partsuppColumns = { + "ps_partkey", "ps_suppkey", "ps_supplycost"}; + std::vector nationColumns = { + "n_nationkey", "n_name", "n_regionkey"}; + std::vector regionColumns = {"r_regionkey", "r_name"}; + + auto supplierSelectedRowTypeSubQuery = + getRowType(kSupplier, supplierColumnsSubQuery); + const auto& supplierFileColumnsSubQuery = getFileColumnNames(kSupplier); + auto nationSelectedRowTypeSubQuery = + getRowType(kNation, nationColumnsSubQuery); + const auto& nationFileColumnsSubQuery = getFileColumnNames(kNation); + auto partSelectedRowType = getRowType(kPart, partColumns); + const auto& partFileColumns = getFileColumnNames(kPart); + auto supplierSelectedRowType = getRowType(kSupplier, supplierColumns); + const auto& supplierFileColumns = getFileColumnNames(kSupplier); + auto partsuppSelectedRowType = getRowType(kPartsupp, partsuppColumns); + const auto& partsuppFileColumns = getFileColumnNames(kPartsupp); + auto nationSelectedRowType = getRowType(kNation, nationColumns); + const auto& nationFileColumns = getFileColumnNames(kNation); + auto regionSelectedRowType = getRowType(kRegion, regionColumns); + const auto& regionFileColumns = getFileColumnNames(kRegion); + + const std::string regionNameFilter = "r_name = 'EUROPE'"; + + auto planNodeIdGenerator = std::make_shared(); + core::PlanNodeId supplierScanIdSubQuery; + core::PlanNodeId partsuppScanIdSubQuery; + core::PlanNodeId nationScanIdSubQuery; + core::PlanNodeId regionScanIdSubQuery; + core::PlanNodeId partScanId; + core::PlanNodeId supplierScanId; + core::PlanNodeId partsuppScanId; + core::PlanNodeId nationScanId; + core::PlanNodeId regionScanId; + + auto regionSubQuery = PlanBuilder(planNodeIdGenerator) + .tableScan( + kRegion, + regionSelectedRowType, + regionFileColumns, + {regionNameFilter}) + .capturePlanNodeId(regionScanIdSubQuery) + .planNode(); + + auto nationJoinRegionSubQuery = + PlanBuilder(planNodeIdGenerator) + .tableScan( + kNation, nationSelectedRowTypeSubQuery, nationFileColumnsSubQuery) + .capturePlanNodeId(nationScanIdSubQuery) + .hashJoin( + {"n_regionkey"}, + {"r_regionkey"}, + regionSubQuery, + "", + {"n_nationkey"}) + .planNode(); + + auto supplierJoinNationJoinRegionSubQuery = + PlanBuilder(planNodeIdGenerator) + .tableScan( + kSupplier, + supplierSelectedRowTypeSubQuery, + supplierFileColumnsSubQuery) + .capturePlanNodeId(supplierScanIdSubQuery) + .hashJoin( + {"s_nationkey"}, + {"n_nationkey"}, + nationJoinRegionSubQuery, + "", + {"s_suppkey"}) + .planNode(); + + auto part = PlanBuilder(planNodeIdGenerator) + .tableScan( + kPart, + partSelectedRowType, + partFileColumns, + {}, + "p_type like '%BRASS'") + .capturePlanNodeId(partScanId) + .filter("p_size = 15") + .planNode(); + + auto region = PlanBuilder(planNodeIdGenerator) + .tableScan( + kRegion, + regionSelectedRowType, + regionFileColumns, + {regionNameFilter}) + .capturePlanNodeId(regionScanId) + .planNode(); + + auto nationJoinRegion = + PlanBuilder(planNodeIdGenerator) + .tableScan(kNation, nationSelectedRowType, nationFileColumns) + .capturePlanNodeId(nationScanId) + .hashJoin( + {"n_regionkey"}, + {"r_regionkey"}, + region, + "", + {"n_nationkey", "n_name"}) + .planNode(); + + auto supplierJoinNationJoinRegion = + PlanBuilder(planNodeIdGenerator) + .tableScan(kSupplier, supplierSelectedRowType, supplierFileColumns) + .capturePlanNodeId(supplierScanId) + .hashJoin( + {"s_nationkey"}, + {"n_nationkey"}, + nationJoinRegion, + "", + mergeColumnNames(supplierColumns, {"s_suppkey", "n_name"})) + .planNode(); + + auto partsuppJoinPartJoinSupplierJoinNationJoinRegion = + PlanBuilder(planNodeIdGenerator) + .tableScan(kPartsupp, partsuppSelectedRowType, partsuppFileColumns) + .capturePlanNodeId(partsuppScanId) + .hashJoin( + {"ps_partkey"}, + {"p_partkey"}, + part, + "", + {"ps_suppkey", "ps_supplycost", "p_partkey", "p_mfgr"}) + .hashJoin( + {"ps_suppkey"}, + {"s_suppkey"}, + supplierJoinNationJoinRegion, + "", + mergeColumnNames( + supplierColumns, + {"ps_supplycost", "p_partkey", "p_mfgr", "n_name"})) + .planNode(); + + auto plan = + PlanBuilder(planNodeIdGenerator) + .tableScan(kPartsupp, partsuppSelectedRowType, partsuppFileColumns) + .capturePlanNodeId(partsuppScanIdSubQuery) + .hashJoin( + {"ps_suppkey"}, + {"s_suppkey"}, + supplierJoinNationJoinRegionSubQuery, + "", + {"ps_supplycost", "ps_partkey"}) + .partialAggregation( + {"ps_partkey"}, {"min(ps_supplycost) AS min_supplycost"}) + .localPartition({"ps_partkey"}) + .finalAggregation() + .hashJoin( + {"ps_partkey"}, + {"p_partkey"}, + partsuppJoinPartJoinSupplierJoinNationJoinRegion, + "ps_supplycost = min_supplycost", + mergeColumnNames( + supplierColumns, {"p_partkey", "p_mfgr", "n_name"})) + .orderBy({"s_acctbal DESC", "n_name", "s_name", "p_partkey"}, false) + .project( + {"s_acctbal", + "s_name", + "n_name", + "p_partkey", + "p_mfgr", + "s_address", + "s_phone", + "s_comment"}) + .limit(0, 100, false) + .planNode(); + + TpchPlan context; + context.plan = std::move(plan); + context.dataFiles[supplierScanIdSubQuery] = getTableFilePaths(kSupplier); + context.dataFiles[partsuppScanIdSubQuery] = getTableFilePaths(kPartsupp); + context.dataFiles[nationScanIdSubQuery] = getTableFilePaths(kNation); + context.dataFiles[regionScanIdSubQuery] = getTableFilePaths(kRegion); + context.dataFiles[partScanId] = getTableFilePaths(kPart); + context.dataFiles[supplierScanId] = getTableFilePaths(kSupplier); + context.dataFiles[partsuppScanId] = getTableFilePaths(kPartsupp); + context.dataFiles[nationScanId] = getTableFilePaths(kNation); + context.dataFiles[regionScanId] = getTableFilePaths(kRegion); + context.dataFileFormat = format_; + return context; +} + TpchPlan TpchQueryBuilder::getQ3Plan() const { std::vector lineitemColumns = { "l_shipdate", "l_orderkey", "l_extendedprice", "l_discount"}; diff --git a/velox/exec/tests/utils/TpchQueryBuilder.h b/velox/exec/tests/utils/TpchQueryBuilder.h index 4f4227247ff0..c1ad1a970031 100644 --- a/velox/exec/tests/utils/TpchQueryBuilder.h +++ b/velox/exec/tests/utils/TpchQueryBuilder.h @@ -91,6 +91,7 @@ class TpchQueryBuilder { const std::vector& columns); TpchPlan getQ1Plan() const; + TpchPlan getQ2Plan() const; TpchPlan getQ3Plan() const; TpchPlan getQ5Plan() const; TpchPlan getQ6Plan() const;