Skip to content

Commit

Permalink
Add expression blacklist
Browse files Browse the repository at this point in the history
  • Loading branch information
ulysses-you committed Dec 12, 2023
1 parent ccf95f5 commit 08c10c0
Show file tree
Hide file tree
Showing 9 changed files with 268 additions and 147 deletions.
97 changes: 49 additions & 48 deletions docs/Configuration.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -213,10 +213,8 @@ case class ProjectExecTransformer private (projectList: Seq[NamedExpression], ch
input: RelNode,
validation: Boolean): RelNode = {
val args = context.registeredFunction
val columnarProjExprs: Seq[ExpressionTransformer] = projectList.map(
expr =>
ExpressionConverter
.replaceWithExpressionTransformer(expr, attributeSeq = originalInputAttributes))
val columnarProjExprs: Seq[ExpressionTransformer] = ExpressionConverter
.replaceWithExpressionTransformer(projectList, attributeSeq = originalInputAttributes)
val projExprNodeList = columnarProjExprs.map(_.doTransform(args)).asJava
val emitStartIndex = originalInputAttributes.size
if (!validation) {
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
*/
package io.glutenproject.expression

import io.glutenproject.GlutenConfig
import io.glutenproject.backendsapi.BackendsApiManager
import io.glutenproject.expression.ExpressionNames._
import io.glutenproject.extension.ExpressionExtensionTrait
Expand Down Expand Up @@ -277,9 +278,16 @@ object ExpressionMappings {
Sig[NthValue](NTH_VALUE)
)

def expressionsMap: Map[Class[_], String] =
defaultExpressionsMap ++
def expressionsMap: Map[Class[_], String] = {
val blacklist = GlutenConfig.getConf.expressionBlacklist
val supportedExprs = defaultExpressionsMap ++
expressionExtensionTransformer.extensionExpressionsMapping
if (blacklist.isEmpty) {
supportedExprs
} else {
supportedExprs.filterNot(kv => blacklist.contains(kv._2))
}
}

private lazy val defaultExpressionsMap: Map[Class[_], String] = {
(SCALAR_SIGS ++ AGGREGATE_SIGS ++ WINDOW_SIGS ++
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ object HiveSimpleUDFTransformer {
case Some(name) =>
GenericExpressionTransformer(
name,
udf.children.map(ExpressionConverter.replaceWithExpressionTransformer(_, attributeSeq)),
ExpressionConverter.replaceWithExpressionTransformer(udf.children, attributeSeq),
udf)
case _ =>
throw new UnsupportedOperationException(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1797,6 +1797,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
"SELECT structFieldSimple.key, arrayFieldSimple[1] FROM tableWithSchema a where int_Field=1")
.exclude("SELECT structFieldComplex.Value.`value_(2)` FROM tableWithSchema")
enableSuite[SparkFunctionStatistics]
enableSuite[GlutenExpressionMappingSuite]

override def getSQLQueryTestSettings: SQLQueryTestSettings = ClickHouseSQLQueryTestSettings
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package io.glutenproject.utils.velox
import io.glutenproject.utils.{BackendTestSettings, SQLQueryTestSettings}

import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.expressions.{GlutenArithmeticExpressionSuite, GlutenBitwiseExpressionsSuite, GlutenCastSuite, GlutenCollectionExpressionsSuite, GlutenComplexTypeSuite, GlutenConditionalExpressionSuite, GlutenDateExpressionsSuite, GlutenDecimalExpressionSuite, GlutenHashExpressionsSuite, GlutenIntervalExpressionsSuite, GlutenLiteralExpressionSuite, GlutenMathExpressionsSuite, GlutenMiscExpressionsSuite, GlutenNondeterministicSuite, GlutenNullExpressionsSuite, GlutenPredicateSuite, GlutenRandomSuite, GlutenRegexpExpressionsSuite, GlutenSortOrderExpressionsSuite, GlutenStringExpressionsSuite}
import org.apache.spark.sql.catalyst.expressions.{GlutenArithmeticExpressionSuite, GlutenBitwiseExpressionsSuite, GlutenCastSuite, GlutenCollectionExpressionsSuite, GlutenComplexTypeSuite, GlutenConditionalExpressionSuite, GlutenDateExpressionsSuite, GlutenDecimalExpressionSuite, GlutenExpressionMappingSuite, GlutenHashExpressionsSuite, GlutenIntervalExpressionsSuite, GlutenLiteralExpressionSuite, GlutenMathExpressionsSuite, GlutenMiscExpressionsSuite, GlutenNondeterministicSuite, GlutenNullExpressionsSuite, GlutenPredicateSuite, GlutenRandomSuite, GlutenRegexpExpressionsSuite, GlutenSortOrderExpressionsSuite, GlutenStringExpressionsSuite}
import org.apache.spark.sql.connector.{GlutenDataSourceV2DataFrameSessionCatalogSuite, GlutenDataSourceV2DataFrameSuite, GlutenDataSourceV2FunctionSuite, GlutenDataSourceV2SQLSessionCatalogSuite, GlutenDataSourceV2SQLSuiteV1Filter, GlutenDataSourceV2SQLSuiteV2Filter, GlutenDataSourceV2Suite, GlutenDeleteFromTableSuite, GlutenFileDataSourceV2FallBackSuite, GlutenKeyGroupedPartitioningSuite, GlutenLocalScanSuite, GlutenMetadataColumnSuite, GlutenSupportsCatalogOptionsSuite, GlutenTableCapabilityCheckSuite, GlutenWriteDistributionAndOrderingSuite}
import org.apache.spark.sql.errors.{GlutenQueryCompilationErrorsDSv2Suite, GlutenQueryCompilationErrorsSuite, GlutenQueryExecutionErrorsSuite, GlutenQueryParsingErrorsSuite}
import org.apache.spark.sql.execution.{FallbackStrategiesSuite, GlutenBroadcastExchangeSuite, GlutenCoalesceShufflePartitionsSuite, GlutenExchangeSuite, GlutenReplaceHashWithSortAggSuite, GlutenReuseExchangeAndSubquerySuite, GlutenSameResultSuite, GlutenSortSuite, GlutenSQLWindowFunctionSuite, GlutenTakeOrderedAndProjectSuite}
Expand Down Expand Up @@ -1200,6 +1200,7 @@ class VeloxTestSettings extends BackendTestSettings {
enableSuite[GlutenFallbackSuite]
enableSuite[GlutenHiveSQLQuerySuite]
enableSuite[GlutenCollapseProjectExecTransformerSuite]
enableSuite[GlutenExpressionMappingSuite]

override def getSQLQueryTestSettings: SQLQueryTestSettings = VeloxSQLQueryTestSettings
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.catalyst.expressions

import io.glutenproject.GlutenConfig
import io.glutenproject.execution.ProjectExecTransformer
import io.glutenproject.expression.ExpressionMappings

import org.apache.spark.sql.{GlutenSQLTestsTrait, Row}
import org.apache.spark.sql.execution.ProjectExec
import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper

class GlutenExpressionMappingSuite extends GlutenSQLTestsTrait with AdaptiveSparkPlanHelper {

test("test expression blacklist") {
val names = ExpressionMappings.expressionsMap.values.toSet
assert(names.contains("regexp_replace"))
assert(names.contains("regexp_extract"))

withSQLConf(GlutenConfig.EXPRESSION_BLACK_LIST.key -> "regexp_replace,regexp_extract,add") {
val names = ExpressionMappings.expressionsMap.values.toSet
assert(!names.contains("regexp_replace"))
assert(!names.contains("regexp_extract"))
assert(names.contains("regexp_extract_all"))
assert(!names.contains("add"))
spark.sql("CREATE TABLE t USING PARQUET AS SELECT 1 as c")
withTable("t") {
val df = spark.sql("SELECT c + 1 FROM t")
checkAnswer(df, Row(2))
assert(find(df.queryExecution.executedPlan)(_.isInstanceOf[ProjectExecTransformer]).isEmpty)
assert(find(df.queryExecution.executedPlan)(_.isInstanceOf[ProjectExec]).isDefined)
}
}
}
}
15 changes: 15 additions & 0 deletions shims/common/src/main/scala/io/glutenproject/GlutenConfig.scala
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,15 @@ class GlutenConfig(conf: SQLConf) extends Logging {

def extendedExpressionTransformer: String = conf.getConf(EXTENDED_EXPRESSION_TRAN_CONF)

def expressionBlacklist: Set[String] = {
val blacklist = conf.getConf(EXPRESSION_BLACK_LIST)
if (blacklist.isDefined) {
blacklist.get.toLowerCase(Locale.ROOT).trim.split(",").toSet
} else {
Set.empty
}
}

def printStackOnValidationFailure: Boolean =
conf.getConf(VALIDATION_PRINT_FAILURE_STACK_)

Expand Down Expand Up @@ -1252,6 +1261,12 @@ object GlutenConfig {
.stringConf
.createWithDefaultString("")

val EXPRESSION_BLACK_LIST =
buildConf("spark.gluten.expression.blacklist")
.doc("A back list of expression to skip transform, multiple values separated by commas.")
.stringConf
.createOptional

val FALLBACK_REPORTER_ENABLED =
buildConf("spark.gluten.sql.columnar.fallbackReporter")
.doc("When true, enable fallback reporter rule to print fallback reason")
Expand Down

0 comments on commit 08c10c0

Please sign in to comment.