From 752f79d245ad3169631967c6c2a29e997269da78 Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Tue, 12 Dec 2023 11:26:32 +0800 Subject: [PATCH] Add expression blacklist --- .../expression/ExpressionMappings.scala | 3 ++ .../clickhouse/ClickHouseTestSettings.scala | 1 + .../utils/velox/VeloxTestSettings.scala | 3 +- .../GlutenExpressionMappingSuite.scala | 50 +++++++++++++++++++ .../scala/io/glutenproject/GlutenConfig.scala | 15 ++++++ 5 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenExpressionMappingSuite.scala diff --git a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionMappings.scala b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionMappings.scala index 705ec28bf3641..48c8c7d943eca 100644 --- a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionMappings.scala +++ b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionMappings.scala @@ -16,6 +16,7 @@ */ package io.glutenproject.expression +import io.glutenproject.GlutenConfig import io.glutenproject.backendsapi.BackendsApiManager import io.glutenproject.expression.ExpressionNames._ import io.glutenproject.extension.ExpressionExtensionTrait @@ -282,9 +283,11 @@ object ExpressionMappings { expressionExtensionTransformer.extensionExpressionsMapping private lazy val defaultExpressionsMap: Map[Class[_], String] = { + val blacklist = GlutenConfig.getConf.expressionBacklist (SCALAR_SIGS ++ AGGREGATE_SIGS ++ WINDOW_SIGS ++ BackendsApiManager.getSparkPlanExecApiInstance.extraExpressionMappings) .map(s => (s.expClass, s.name)) + .filterNot(kv => blacklist.contains(kv._2)) .toMap[Class[_], String] } diff --git a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala index 2a26440eba3b7..a51e7a4249807 100644 --- a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/clickhouse/ClickHouseTestSettings.scala @@ -1797,6 +1797,7 @@ class ClickHouseTestSettings extends BackendTestSettings { "SELECT structFieldSimple.key, arrayFieldSimple[1] FROM tableWithSchema a where int_Field=1") .exclude("SELECT structFieldComplex.Value.`value_(2)` FROM tableWithSchema") enableSuite[SparkFunctionStatistics] + enableSuite[GlutenExpressionMappingSuite] override def getSQLQueryTestSettings: SQLQueryTestSettings = ClickHouseSQLQueryTestSettings } diff --git a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala index 48a162468460c..9948393609ca4 100644 --- a/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/io/glutenproject/utils/velox/VeloxTestSettings.scala @@ -19,7 +19,7 @@ package io.glutenproject.utils.velox import io.glutenproject.utils.{BackendTestSettings, SQLQueryTestSettings} import org.apache.spark.sql._ -import org.apache.spark.sql.catalyst.expressions.{GlutenArithmeticExpressionSuite, GlutenBitwiseExpressionsSuite, GlutenCastSuite, GlutenCollectionExpressionsSuite, GlutenComplexTypeSuite, GlutenConditionalExpressionSuite, GlutenDateExpressionsSuite, GlutenDecimalExpressionSuite, GlutenHashExpressionsSuite, GlutenIntervalExpressionsSuite, GlutenLiteralExpressionSuite, GlutenMathExpressionsSuite, GlutenMiscExpressionsSuite, GlutenNondeterministicSuite, GlutenNullExpressionsSuite, GlutenPredicateSuite, GlutenRandomSuite, GlutenRegexpExpressionsSuite, GlutenSortOrderExpressionsSuite, GlutenStringExpressionsSuite} +import org.apache.spark.sql.catalyst.expressions.{GlutenArithmeticExpressionSuite, GlutenBitwiseExpressionsSuite, GlutenCastSuite, GlutenCollectionExpressionsSuite, GlutenComplexTypeSuite, GlutenConditionalExpressionSuite, GlutenDateExpressionsSuite, GlutenDecimalExpressionSuite, GlutenExpressionMappingSuite, GlutenHashExpressionsSuite, GlutenIntervalExpressionsSuite, GlutenLiteralExpressionSuite, GlutenMathExpressionsSuite, GlutenMiscExpressionsSuite, GlutenNondeterministicSuite, GlutenNullExpressionsSuite, GlutenPredicateSuite, GlutenRandomSuite, GlutenRegexpExpressionsSuite, GlutenSortOrderExpressionsSuite, GlutenStringExpressionsSuite} import org.apache.spark.sql.connector.{GlutenDataSourceV2DataFrameSessionCatalogSuite, GlutenDataSourceV2DataFrameSuite, GlutenDataSourceV2FunctionSuite, GlutenDataSourceV2SQLSessionCatalogSuite, GlutenDataSourceV2SQLSuiteV1Filter, GlutenDataSourceV2SQLSuiteV2Filter, GlutenDataSourceV2Suite, GlutenDeleteFromTableSuite, GlutenFileDataSourceV2FallBackSuite, GlutenKeyGroupedPartitioningSuite, GlutenLocalScanSuite, GlutenMetadataColumnSuite, GlutenSupportsCatalogOptionsSuite, GlutenTableCapabilityCheckSuite, GlutenWriteDistributionAndOrderingSuite} import org.apache.spark.sql.errors.{GlutenQueryCompilationErrorsDSv2Suite, GlutenQueryCompilationErrorsSuite, GlutenQueryExecutionErrorsSuite, GlutenQueryParsingErrorsSuite} import org.apache.spark.sql.execution.{FallbackStrategiesSuite, GlutenBroadcastExchangeSuite, GlutenCoalesceShufflePartitionsSuite, GlutenExchangeSuite, GlutenReplaceHashWithSortAggSuite, GlutenReuseExchangeAndSubquerySuite, GlutenSameResultSuite, GlutenSortSuite, GlutenSQLWindowFunctionSuite, GlutenTakeOrderedAndProjectSuite} @@ -1200,6 +1200,7 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenFallbackSuite] enableSuite[GlutenHiveSQLQuerySuite] enableSuite[GlutenCollapseProjectExecTransformerSuite] + enableSuite[GlutenExpressionMappingSuite] override def getSQLQueryTestSettings: SQLQueryTestSettings = VeloxSQLQueryTestSettings } diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenExpressionMappingSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenExpressionMappingSuite.scala new file mode 100644 index 0000000000000..683f3de90f104 --- /dev/null +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenExpressionMappingSuite.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.catalyst.expressions + +import io.glutenproject.GlutenConfig +import io.glutenproject.execution.ProjectExecTransformer +import io.glutenproject.expression.ExpressionMappings + +import org.apache.spark.SparkConf +import org.apache.spark.sql.{GlutenSQLTestsTrait, Row} +import org.apache.spark.sql.execution.ProjectExec +import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper + +class GlutenExpressionMappingSuite extends GlutenSQLTestsTrait with AdaptiveSparkPlanHelper { + + override def sparkConf: SparkConf = { + super.sparkConf + .set(GlutenConfig.EXPRESSION_BLACK_LIST.key, "regexp_replace,regexp_extract,add") + } + + test("test expression blacklist") { + val names = ExpressionMappings.expressionsMap.values.toSet + assert(!names.contains("regexp_replace")) + assert(!names.contains("regexp_extract")) + assert(names.contains("regexp_extract_all")) + assert(!names.contains("add")) + + spark.sql("CREATE TABLE t USING PARQUET AS SELECT 1 as c") + withTable("t") { + val df = spark.sql("SELECT c + 1 FROM t") + checkAnswer(df, Row(2)) + assert(find(df.queryExecution.executedPlan)(_.isInstanceOf[ProjectExecTransformer]).isEmpty) + assert(find(df.queryExecution.executedPlan)(_.isInstanceOf[ProjectExec]).isDefined) + } + } +} diff --git a/shims/common/src/main/scala/io/glutenproject/GlutenConfig.scala b/shims/common/src/main/scala/io/glutenproject/GlutenConfig.scala index 27bd9389d4a5c..70bbd91d19a43 100644 --- a/shims/common/src/main/scala/io/glutenproject/GlutenConfig.scala +++ b/shims/common/src/main/scala/io/glutenproject/GlutenConfig.scala @@ -262,6 +262,15 @@ class GlutenConfig(conf: SQLConf) extends Logging { def extendedExpressionTransformer: String = conf.getConf(EXTENDED_EXPRESSION_TRAN_CONF) + def expressionBacklist: Set[String] = { + val backlist = conf.getConf(EXPRESSION_BLACK_LIST) + if (backlist.isDefined) { + backlist.get.toLowerCase(Locale.ROOT).trim.split(",").toSet + } else { + Set.empty + } + } + def printStackOnValidationFailure: Boolean = conf.getConf(VALIDATION_PRINT_FAILURE_STACK_) @@ -1252,6 +1261,12 @@ object GlutenConfig { .stringConf .createWithDefaultString("") + val EXPRESSION_BLACK_LIST = + buildStaticConf("spark.gluten.expression.backlist") + .doc("A back list of expression to skip transform.") + .stringConf + .createOptional + val FALLBACK_REPORTER_ENABLED = buildConf("spark.gluten.sql.columnar.fallbackReporter") .doc("When true, enable fallback reporter rule to print fallback reason")