From 0fff3e20858e971649f03a3b444c9beb0231fe9a Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 19 Aug 2024 12:28:33 +0800 Subject: [PATCH] fix tests --- .../apache/gluten/utils/CHJoinValidateUtil.scala | 8 ++++++-- ...enClickHouseColumnarMemorySortShuffleSuite.scala | 2 +- .../GlutenClickHouseColumnarShuffleAQESuite.scala | 2 +- .../GlutenClickHouseDSV2ColumnarShuffleSuite.scala | 2 +- .../execution/GlutenClickHouseDSV2Suite.scala | 2 +- .../execution/GlutenClickHouseDecimalSuite.scala | 2 +- ...ClickHouseTPCHNullableColumnarShuffleSuite.scala | 2 +- .../GlutenClickHouseTPCHNullableSuite.scala | 2 +- .../execution/GlutenClickHouseTPCHSuite.scala | 2 +- ...ckHouseTPCDSParquetColumnarShuffleAQESuite.scala | 2 +- ...enClickHouseTPCDSParquetGraceHashJoinSuite.scala | 3 ++- ...enClickHouseTPCDSParquetSortMergeJoinSuite.scala | 5 +++-- ...ickHouseTPCHColumnarShuffleParquetAQESuite.scala | 2 +- ...tenClickHouseTPCHParquetAQEConcurrentSuite.scala | 2 -- .../tpch/GlutenClickHouseTPCHParquetAQESuite.scala | 2 +- .../GlutenClickHouseTPCHSaltNullParquetSuite.scala | 2 +- cpp-ch/local-engine/Common/QueryContext.cpp | 2 +- cpp-ch/local-engine/Parser/JoinRelParser.cpp | 13 +++++++------ ...lickHouseRSSColumnarMemorySortShuffleSuite.scala | 2 +- ...GlutenClickHouseRSSColumnarShuffleAQESuite.scala | 2 +- 20 files changed, 33 insertions(+), 28 deletions(-) diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHJoinValidateUtil.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHJoinValidateUtil.scala index aabdae0951a7..b86482720c7c 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHJoinValidateUtil.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHJoinValidateUtil.scala @@ -54,8 +54,12 @@ object CHJoinValidateUtil extends Logging { condition.isDefined && hasTwoTableColumn(leftOutputSet, rightOutputSet, condition.get) val shouldFallback = joinStrategy match { case SortMergeJoinStrategy(joinType) => - joinType.sql.contains("SEMI") || joinType.sql.contains("ANTI") || joinType.toString - .contains("ExistenceJoin") || hasMixedFilterCondition + if (!joinType.isInstanceOf[ExistenceJoin] && joinType.sql.contains("INNER")) { + false + } else { + joinType.sql.contains("SEMI") || joinType.sql.contains("ANTI") || joinType.toString + .contains("ExistenceJoin") || hasMixedFilterCondition + } case UnknownJoinStrategy(joinType) => throw new IllegalArgumentException(s"Unknown join type $joinStrategy") case _ => false diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseColumnarMemorySortShuffleSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseColumnarMemorySortShuffleSuite.scala index b9d580c7249c..4c49cc2d9f46 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseColumnarMemorySortShuffleSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseColumnarMemorySortShuffleSuite.scala @@ -119,7 +119,7 @@ class GlutenClickHouseColumnarMemorySortShuffleSuite } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseColumnarShuffleAQESuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseColumnarShuffleAQESuite.scala index 10e5c7534d35..e5da78de3fd6 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseColumnarShuffleAQESuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseColumnarShuffleAQESuite.scala @@ -163,7 +163,7 @@ class GlutenClickHouseColumnarShuffleAQESuite } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDSV2ColumnarShuffleSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDSV2ColumnarShuffleSuite.scala index dd997832d3e3..17bd9912b032 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDSV2ColumnarShuffleSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDSV2ColumnarShuffleSuite.scala @@ -168,7 +168,7 @@ class GlutenClickHouseDSV2ColumnarShuffleSuite extends GlutenClickHouseTPCHAbstr } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDSV2Suite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDSV2Suite.scala index 08393ccfe774..a58b6b1c1292 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDSV2Suite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDSV2Suite.scala @@ -126,7 +126,7 @@ class GlutenClickHouseDSV2Suite extends GlutenClickHouseTPCHAbstractSuite { } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDecimalSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDecimalSuite.scala index cf1bdd296c01..bd831e64bf38 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDecimalSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDecimalSuite.scala @@ -343,7 +343,7 @@ class GlutenClickHouseDecimalSuite decimalTPCHTables.foreach { dt => { - val fallBack = (sql_num == 16 || sql_num == 21) + val fallBack = (sql_num == 16) val compareResult = !dt._2.contains(sql_num) val native = if (fallBack) "fallback" else "native" val compare = if (compareResult) "compare" else "noCompare" diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableColumnarShuffleSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableColumnarShuffleSuite.scala index c5f67f45d577..5f9aa0dbda60 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableColumnarShuffleSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableColumnarShuffleSuite.scala @@ -171,7 +171,7 @@ class GlutenClickHouseTPCHNullableColumnarShuffleSuite extends GlutenClickHouseT } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala index 7f62c6993157..e0e4d3380450 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala @@ -174,7 +174,7 @@ class GlutenClickHouseTPCHNullableSuite extends GlutenClickHouseTPCHAbstractSuit } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala index 1c09449c817f..f25a1313255f 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSuite.scala @@ -175,7 +175,7 @@ class GlutenClickHouseTPCHSuite extends GlutenClickHouseTPCHAbstractSuite { } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetColumnarShuffleAQESuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetColumnarShuffleAQESuite.scala index 1fd8983f5876..3e1507bf17aa 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetColumnarShuffleAQESuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetColumnarShuffleAQESuite.scala @@ -239,6 +239,6 @@ class GlutenClickHouseTPCDSParquetColumnarShuffleAQESuite | LIMIT 100 ; |""".stripMargin // There are some BroadcastHashJoin with NOT condition - compareResultsAgainstVanillaSpark(sql, true, { df => }, false) + compareResultsAgainstVanillaSpark(sql, true, { df => }) } } diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetGraceHashJoinSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetGraceHashJoinSuite.scala index 716ea5761d2d..4b4a0d34e89a 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetGraceHashJoinSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetGraceHashJoinSuite.scala @@ -31,9 +31,10 @@ class GlutenClickHouseTPCDSParquetGraceHashJoinSuite extends GlutenClickHouseTPC .set("spark.io.compression.codec", "snappy") .set("spark.sql.shuffle.partitions", "5") .set("spark.sql.autoBroadcastJoinThreshold", "10MB") - .set("spark.memory.offHeap.size", "8g") + .set("spark.memory.offHeap.size", "6g") .set("spark.gluten.sql.columnar.backend.ch.runtime_settings.join_algorithm", "grace_hash") .set("spark.gluten.sql.columnar.backend.ch.runtime_settings.max_bytes_in_join", "314572800") + .setMaster("local[2]") } executeTPCDSTest(false) diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetSortMergeJoinSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetSortMergeJoinSuite.scala index 509c830545c6..41436da70dad 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetSortMergeJoinSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpcds/GlutenClickHouseTPCDSParquetSortMergeJoinSuite.scala @@ -49,9 +49,10 @@ class GlutenClickHouseTPCDSParquetSortMergeJoinSuite extends GlutenClickHouseTPC .set("spark.shuffle.manager", "sort") .set("spark.io.compression.codec", "snappy") .set("spark.sql.shuffle.partitions", "5") - .set("spark.sql.autoBroadcastJoinThreshold", "10MB") - .set("spark.memory.offHeap.size", "8g") + .set("spark.sql.autoBroadcastJoinThreshold", "-1") + .set("spark.memory.offHeap.size", "6g") .set("spark.gluten.sql.columnar.forceShuffledHashJoin", "false") + .setMaster("local[2]") } executeTPCDSTest(false) diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHColumnarShuffleParquetAQESuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHColumnarShuffleParquetAQESuite.scala index c2e2f9f5565f..39dc7baf9695 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHColumnarShuffleParquetAQESuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHColumnarShuffleParquetAQESuite.scala @@ -275,7 +275,7 @@ class GlutenClickHouseTPCHColumnarShuffleParquetAQESuite } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { + runTPCHQuery(21) { df => val plans = collect(df.queryExecution.executedPlan) { case scanExec: BasicScanExecTransformer => scanExec diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHParquetAQEConcurrentSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHParquetAQEConcurrentSuite.scala index 8c706f683639..e1dd33912bd7 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHParquetAQEConcurrentSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHParquetAQEConcurrentSuite.scala @@ -45,7 +45,6 @@ class GlutenClickHouseTPCHParquetAQEConcurrentSuite .set("spark.shuffle.manager", "sort") .set("spark.io.compression.codec", "snappy") .set("spark.sql.shuffle.partitions", "5") - .set("spark.sql.autoBroadcastJoinThreshold", "10MB") .set("spark.sql.adaptive.enabled", "true") .set("spark.sql.autoBroadcastJoinThreshold", "-1") } @@ -82,5 +81,4 @@ class GlutenClickHouseTPCHParquetAQEConcurrentSuite queries.map(queryId => runTPCHQuery(queryId) { df => }) } - } diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHParquetAQESuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHParquetAQESuite.scala index 1d8389b48143..2aadac05d348 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHParquetAQESuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHParquetAQESuite.scala @@ -209,7 +209,7 @@ class GlutenClickHouseTPCHParquetAQESuite } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") { diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala index 0efc1414ce33..e21df203dac0 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala @@ -335,7 +335,7 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends GlutenClickHouseTPCHAbstr } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("GLUTEN-2115: Fix wrong number of records shuffle written") { diff --git a/cpp-ch/local-engine/Common/QueryContext.cpp b/cpp-ch/local-engine/Common/QueryContext.cpp index ff9c151159a6..0abff2fc143d 100644 --- a/cpp-ch/local-engine/Common/QueryContext.cpp +++ b/cpp-ch/local-engine/Common/QueryContext.cpp @@ -172,4 +172,4 @@ double currentThreadGroupMemoryUsageRatio() } return static_cast(CurrentThread::getGroup()->memory_tracker.get()) / CurrentThread::getGroup()->memory_tracker.getSoftLimit(); } -} \ No newline at end of file +} diff --git a/cpp-ch/local-engine/Parser/JoinRelParser.cpp b/cpp-ch/local-engine/Parser/JoinRelParser.cpp index 0446a397c008..99270348ce05 100644 --- a/cpp-ch/local-engine/Parser/JoinRelParser.cpp +++ b/cpp-ch/local-engine/Parser/JoinRelParser.cpp @@ -681,14 +681,14 @@ bool JoinRelParser::couldRewriteToMultiJoinOnClauses( auto optional_keys = parse_join_keys(current_expr); if (!optional_keys) { - LOG_ERROR(getLogger("JoinRelParser"), "Not equal comparison for keys from both tables"); + LOG_INFO(getLogger("JoinRelParser"), "Not equal comparison for keys from both tables"); return false; } join_on_clause.addKey(optional_keys->first, optional_keys->second, false); } else { - LOG_ERROR(getLogger("JoinRelParser"), "And or equals function is expected"); + LOG_INFO(getLogger("JoinRelParser"), "And or equals function is expected"); return false; } } @@ -701,7 +701,8 @@ bool JoinRelParser::couldRewriteToMultiJoinOnClauses( expression_stack.pop_back(); if (!check_function("or", current_expr)) { - LOG_ERROR(getLogger("JoinRelParser"), "Not an or expression"); + LOG_INFO(getLogger("JoinRelParser"), "Not an or expression"); + return false; } auto get_current_join_on_clause = [&]() @@ -719,7 +720,7 @@ bool JoinRelParser::couldRewriteToMultiJoinOnClauses( auto optional_keys = parse_join_keys(arg.value()); if (!optional_keys) { - LOG_ERROR(getLogger("JoinRelParser"), "Not equal comparison for keys from both tables"); + LOG_INFO(getLogger("JoinRelParser"), "Not equal comparison for keys from both tables"); return false; } get_current_join_on_clause()->addKey(optional_keys->first, optional_keys->second, false); @@ -728,7 +729,7 @@ bool JoinRelParser::couldRewriteToMultiJoinOnClauses( { if (!parse_and_expression(arg.value(), *get_current_join_on_clause())) { - LOG_ERROR(getLogger("JoinRelParser"), "Parse and expression failed"); + LOG_INFO(getLogger("JoinRelParser"), "Parse and expression failed"); return false; } } @@ -738,7 +739,7 @@ bool JoinRelParser::couldRewriteToMultiJoinOnClauses( } else { - LOG_ERROR(getLogger("JoinRelParser"), "Unknow function"); + LOG_INFO(getLogger("JoinRelParser"), "Unknow function"); return false; } } diff --git a/gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarMemorySortShuffleSuite.scala b/gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarMemorySortShuffleSuite.scala index ddef1d87cd08..ef1c4180b11b 100644 --- a/gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarMemorySortShuffleSuite.scala +++ b/gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarMemorySortShuffleSuite.scala @@ -127,7 +127,7 @@ class GlutenClickHouseRSSColumnarMemorySortShuffleSuite } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") { diff --git a/gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarShuffleAQESuite.scala b/gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarShuffleAQESuite.scala index c3d71ba53088..50220264e063 100644 --- a/gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarShuffleAQESuite.scala +++ b/gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarShuffleAQESuite.scala @@ -162,7 +162,7 @@ class GlutenClickHouseRSSColumnarShuffleAQESuite } test("TPCH Q21") { - runTPCHQuery(21, noFallBack = false) { df => } + runTPCHQuery(21) { df => } } test("TPCH Q22") {