Skip to content

Commit

Permalink
Add expression blacklist
Browse files Browse the repository at this point in the history
  • Loading branch information
ulysses-you committed Dec 12, 2023
1 parent ccf95f5 commit 752f79d
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package io.glutenproject.expression

import io.glutenproject.GlutenConfig
import io.glutenproject.backendsapi.BackendsApiManager
import io.glutenproject.expression.ExpressionNames._
import io.glutenproject.extension.ExpressionExtensionTrait
Expand Down Expand Up @@ -282,9 +283,11 @@ object ExpressionMappings {
expressionExtensionTransformer.extensionExpressionsMapping

private lazy val defaultExpressionsMap: Map[Class[_], String] = {
val blacklist = GlutenConfig.getConf.expressionBacklist
(SCALAR_SIGS ++ AGGREGATE_SIGS ++ WINDOW_SIGS ++
BackendsApiManager.getSparkPlanExecApiInstance.extraExpressionMappings)
.map(s => (s.expClass, s.name))
.filterNot(kv => blacklist.contains(kv._2))
.toMap[Class[_], String]
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1797,6 +1797,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
"SELECT structFieldSimple.key, arrayFieldSimple[1] FROM tableWithSchema a where int_Field=1")
.exclude("SELECT structFieldComplex.Value.`value_(2)` FROM tableWithSchema")
enableSuite[SparkFunctionStatistics]
enableSuite[GlutenExpressionMappingSuite]

override def getSQLQueryTestSettings: SQLQueryTestSettings = ClickHouseSQLQueryTestSettings
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package io.glutenproject.utils.velox
import io.glutenproject.utils.{BackendTestSettings, SQLQueryTestSettings}

import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.expressions.{GlutenArithmeticExpressionSuite, GlutenBitwiseExpressionsSuite, GlutenCastSuite, GlutenCollectionExpressionsSuite, GlutenComplexTypeSuite, GlutenConditionalExpressionSuite, GlutenDateExpressionsSuite, GlutenDecimalExpressionSuite, GlutenHashExpressionsSuite, GlutenIntervalExpressionsSuite, GlutenLiteralExpressionSuite, GlutenMathExpressionsSuite, GlutenMiscExpressionsSuite, GlutenNondeterministicSuite, GlutenNullExpressionsSuite, GlutenPredicateSuite, GlutenRandomSuite, GlutenRegexpExpressionsSuite, GlutenSortOrderExpressionsSuite, GlutenStringExpressionsSuite}
import org.apache.spark.sql.catalyst.expressions.{GlutenArithmeticExpressionSuite, GlutenBitwiseExpressionsSuite, GlutenCastSuite, GlutenCollectionExpressionsSuite, GlutenComplexTypeSuite, GlutenConditionalExpressionSuite, GlutenDateExpressionsSuite, GlutenDecimalExpressionSuite, GlutenExpressionMappingSuite, GlutenHashExpressionsSuite, GlutenIntervalExpressionsSuite, GlutenLiteralExpressionSuite, GlutenMathExpressionsSuite, GlutenMiscExpressionsSuite, GlutenNondeterministicSuite, GlutenNullExpressionsSuite, GlutenPredicateSuite, GlutenRandomSuite, GlutenRegexpExpressionsSuite, GlutenSortOrderExpressionsSuite, GlutenStringExpressionsSuite}
import org.apache.spark.sql.connector.{GlutenDataSourceV2DataFrameSessionCatalogSuite, GlutenDataSourceV2DataFrameSuite, GlutenDataSourceV2FunctionSuite, GlutenDataSourceV2SQLSessionCatalogSuite, GlutenDataSourceV2SQLSuiteV1Filter, GlutenDataSourceV2SQLSuiteV2Filter, GlutenDataSourceV2Suite, GlutenDeleteFromTableSuite, GlutenFileDataSourceV2FallBackSuite, GlutenKeyGroupedPartitioningSuite, GlutenLocalScanSuite, GlutenMetadataColumnSuite, GlutenSupportsCatalogOptionsSuite, GlutenTableCapabilityCheckSuite, GlutenWriteDistributionAndOrderingSuite}
import org.apache.spark.sql.errors.{GlutenQueryCompilationErrorsDSv2Suite, GlutenQueryCompilationErrorsSuite, GlutenQueryExecutionErrorsSuite, GlutenQueryParsingErrorsSuite}
import org.apache.spark.sql.execution.{FallbackStrategiesSuite, GlutenBroadcastExchangeSuite, GlutenCoalesceShufflePartitionsSuite, GlutenExchangeSuite, GlutenReplaceHashWithSortAggSuite, GlutenReuseExchangeAndSubquerySuite, GlutenSameResultSuite, GlutenSortSuite, GlutenSQLWindowFunctionSuite, GlutenTakeOrderedAndProjectSuite}
Expand Down Expand Up @@ -1200,6 +1200,7 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenFallbackSuite]
enableSuite[GlutenHiveSQLQuerySuite]
enableSuite[GlutenCollapseProjectExecTransformerSuite]
enableSuite[GlutenExpressionMappingSuite]

override def getSQLQueryTestSettings: SQLQueryTestSettings = VeloxSQLQueryTestSettings
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.catalyst.expressions

import io.glutenproject.GlutenConfig
import io.glutenproject.execution.ProjectExecTransformer
import io.glutenproject.expression.ExpressionMappings

import org.apache.spark.SparkConf
import org.apache.spark.sql.{GlutenSQLTestsTrait, Row}
import org.apache.spark.sql.execution.ProjectExec
import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper

class GlutenExpressionMappingSuite extends GlutenSQLTestsTrait with AdaptiveSparkPlanHelper {

override def sparkConf: SparkConf = {
super.sparkConf
.set(GlutenConfig.EXPRESSION_BLACK_LIST.key, "regexp_replace,regexp_extract,add")
}

test("test expression blacklist") {
val names = ExpressionMappings.expressionsMap.values.toSet
assert(!names.contains("regexp_replace"))
assert(!names.contains("regexp_extract"))
assert(names.contains("regexp_extract_all"))
assert(!names.contains("add"))

spark.sql("CREATE TABLE t USING PARQUET AS SELECT 1 as c")
withTable("t") {
val df = spark.sql("SELECT c + 1 FROM t")
checkAnswer(df, Row(2))
assert(find(df.queryExecution.executedPlan)(_.isInstanceOf[ProjectExecTransformer]).isEmpty)
assert(find(df.queryExecution.executedPlan)(_.isInstanceOf[ProjectExec]).isDefined)
}
}
}
15 changes: 15 additions & 0 deletions shims/common/src/main/scala/io/glutenproject/GlutenConfig.scala
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,15 @@ class GlutenConfig(conf: SQLConf) extends Logging {

def extendedExpressionTransformer: String = conf.getConf(EXTENDED_EXPRESSION_TRAN_CONF)

def expressionBacklist: Set[String] = {
val backlist = conf.getConf(EXPRESSION_BLACK_LIST)
if (backlist.isDefined) {
backlist.get.toLowerCase(Locale.ROOT).trim.split(",").toSet
} else {
Set.empty
}
}

def printStackOnValidationFailure: Boolean =
conf.getConf(VALIDATION_PRINT_FAILURE_STACK_)

Expand Down Expand Up @@ -1252,6 +1261,12 @@ object GlutenConfig {
.stringConf
.createWithDefaultString("")

val EXPRESSION_BLACK_LIST =
buildStaticConf("spark.gluten.expression.backlist")
.doc("A back list of expression to skip transform.")
.stringConf
.createOptional

val FALLBACK_REPORTER_ENABLED =
buildConf("spark.gluten.sql.columnar.fallbackReporter")
.doc("When true, enable fallback reporter rule to print fallback reason")
Expand Down

0 comments on commit 752f79d

Please sign in to comment.