From 2b6390d5be0ad86ef2126cd75441c7bf79f19d75 Mon Sep 17 00:00:00 2001 From: Ankita Victor Date: Fri, 29 Nov 2024 15:46:12 +0530 Subject: [PATCH 1/8] Update try_cast logic --- .../ScalarFunctionsValidateSuite.scala | 25 ++++++++++++ .../gluten/substrait/expression/CastNode.java | 8 ++-- .../expression/ExpressionBuilder.java | 4 +- .../UnaryExpressionTransformer.scala | 5 ++- .../utils/velox/VeloxTestSettings.scala | 13 +++++- .../GlutenCastWithAnsiOnSuite.scala | 40 +++++++++++++++++++ .../expressions/GlutenTryCastSuite.scala | 21 ++++++++++ 7 files changed, 108 insertions(+), 8 deletions(-) create mode 100644 gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastWithAnsiOnSuite.scala create mode 100644 gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenTryCastSuite.scala diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index 46d6870b04c9..bcea2d7230af 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -1457,4 +1457,29 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite { checkGlutenOperatorMatch[FilterExecTransformer](df) } } + + test("Test cast and try_cast") { + withTempView("try_cast_table") { + withTempPath { + path => + Seq[(String)](("123456"), ("000A1234")) + .toDF("str") + .write + .parquet(path.getCanonicalPath) + spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("try_cast_table") + runQueryAndCompare("select try_cast(str as bigint) from try_cast_table") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + runQueryAndCompare("select cast(str as bigint) from try_cast_table") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + runQueryAndCompare("select try_cast(str as double) from try_cast_table") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + runQueryAndCompare("select cast(str as double) from try_cast_table") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + } + } + } } diff --git a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java index 1acbcf6acaa3..3d2def2bd10b 100644 --- a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java +++ b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/CastNode.java @@ -26,12 +26,12 @@ public class CastNode implements ExpressionNode, Serializable { private final TypeNode typeNode; private final ExpressionNode expressionNode; - public final boolean ansiEnabled; + public final boolean throwOnFailure; - CastNode(TypeNode typeNode, ExpressionNode expressionNode, boolean ansiEnabled) { + CastNode(TypeNode typeNode, ExpressionNode expressionNode, boolean throwOnFailure) { this.typeNode = typeNode; this.expressionNode = expressionNode; - this.ansiEnabled = ansiEnabled; + this.throwOnFailure = throwOnFailure; } @Override @@ -39,7 +39,7 @@ public Expression toProtobuf() { Expression.Cast.Builder castBuilder = Expression.Cast.newBuilder(); castBuilder.setType(typeNode.toProtobuf()); castBuilder.setInput(expressionNode.toProtobuf()); - if (ansiEnabled) { + if (throwOnFailure) { // Throw exception on failure. castBuilder.setFailureBehaviorValue(2); } else { diff --git a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java index 16ae5412ea76..1e6c58f682ca 100644 --- a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java +++ b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/ExpressionBuilder.java @@ -263,8 +263,8 @@ public static AggregateFunctionNode makeAggregateFunction( } public static CastNode makeCast( - TypeNode typeNode, ExpressionNode expressionNode, boolean ansiEnabled) { - return new CastNode(typeNode, expressionNode, ansiEnabled); + TypeNode typeNode, ExpressionNode expressionNode, boolean throwOnFailure) { + return new CastNode(typeNode, expressionNode, throwOnFailure); } public static StringMapNode makeStringMap(Map values) { diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala index bcbac60dec0d..50813bc12ddb 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala @@ -43,7 +43,10 @@ case class CastTransformer(substraitExprName: String, child: ExpressionTransform extends UnaryExpressionTransformer { override def doTransform(args: java.lang.Object): ExpressionNode = { val typeNode = ConverterUtils.getTypeNode(dataType, original.nullable) - ExpressionBuilder.makeCast(typeNode, child.doTransform(args), original.ansiEnabled) + ExpressionBuilder.makeCast( + typeNode, + child.doTransform(args), + original.evalMode == EvalMode.ANSI) } } diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index cb2151fe4698..14066d55a4e5 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -20,7 +20,7 @@ import org.apache.gluten.utils.{BackendTestSettings, SQLQueryTestSettings} import org.apache.spark.GlutenSortShuffleSuite import org.apache.spark.sql._ -import org.apache.spark.sql.catalyst.expressions.{GlutenArithmeticExpressionSuite, GlutenBitwiseExpressionsSuite, GlutenCastSuite, GlutenCollectionExpressionsSuite, GlutenComplexTypeSuite, GlutenConditionalExpressionSuite, GlutenDateExpressionsSuite, GlutenDecimalExpressionSuite, GlutenDecimalPrecisionSuite, GlutenHashExpressionsSuite, GlutenHigherOrderFunctionsSuite, GlutenIntervalExpressionsSuite, GlutenLiteralExpressionSuite, GlutenMathExpressionsSuite, GlutenMiscExpressionsSuite, GlutenNondeterministicSuite, GlutenNullExpressionsSuite, GlutenPredicateSuite, GlutenRandomSuite, GlutenRegexpExpressionsSuite, GlutenSortOrderExpressionsSuite, GlutenStringExpressionsSuite, GlutenTryEvalSuite} +import org.apache.spark.sql.catalyst.expressions.{GlutenArithmeticExpressionSuite, GlutenBitwiseExpressionsSuite, GlutenCastSuite, GlutenCastWithAnsiOnSuite, GlutenCollectionExpressionsSuite, GlutenComplexTypeSuite, GlutenConditionalExpressionSuite, GlutenDateExpressionsSuite, GlutenDecimalExpressionSuite, GlutenDecimalPrecisionSuite, GlutenHashExpressionsSuite, GlutenHigherOrderFunctionsSuite, GlutenIntervalExpressionsSuite, GlutenLiteralExpressionSuite, GlutenMathExpressionsSuite, GlutenMiscExpressionsSuite, GlutenNondeterministicSuite, GlutenNullExpressionsSuite, GlutenPredicateSuite, GlutenRandomSuite, GlutenRegexpExpressionsSuite, GlutenSortOrderExpressionsSuite, GlutenStringExpressionsSuite, GlutenTryCastSuite, GlutenTryEvalSuite} import org.apache.spark.sql.connector._ import org.apache.spark.sql.errors.{GlutenQueryCompilationErrorsDSv2Suite, GlutenQueryCompilationErrorsSuite, GlutenQueryExecutionErrorsSuite, GlutenQueryParsingErrorsSuite} import org.apache.spark.sql.execution._ @@ -92,6 +92,17 @@ class VeloxTestSettings extends BackendTestSettings { .exclude( "Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly. ) + enableSuite[GlutenCastWithAnsiOnSuite] + enableSuite[GlutenTryCastSuite] + .exclude( + "Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly. + ) + .exclude("ANSI mode: Throw exception on casting out-of-range value to byte type") + .exclude("ANSI mode: Throw exception on casting out-of-range value to short type") + .exclude("ANSI mode: Throw exception on casting out-of-range value to int type") + .exclude("ANSI mode: Throw exception on casting out-of-range value to long type") + .exclude("cast from invalid string to numeric should throw NumberFormatException") + .exclude("SPARK-26218: Fix the corner case of codegen when casting float to Integer") enableSuite[GlutenCollectionExpressionsSuite] // Rewrite in Gluten to replace Seq with Array .exclude("Shuffle") diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastWithAnsiOnSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastWithAnsiOnSuite.scala new file mode 100644 index 000000000000..5a0a07c58653 --- /dev/null +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastWithAnsiOnSuite.scala @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.catalyst.expressions + +import org.apache.spark.sql.GlutenTestsTrait +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.DataType + +class GlutenCastWithAnsiOnSuite extends CastWithAnsiOnSuite with GlutenTestsTrait { + override def beforeAll(): Unit = { + super.beforeAll() + SQLConf.get.setConf(SQLConf.ANSI_ENABLED, true) + } + + override def afterAll(): Unit = { + super.afterAll() + SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED) + } + + override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): Cast = { + v match { + case lit: Expression => Cast(lit, targetType, timeZoneId) + case _ => Cast(Literal(v), targetType, timeZoneId) + } + } +} diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenTryCastSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenTryCastSuite.scala new file mode 100644 index 000000000000..b0bddf935467 --- /dev/null +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenTryCastSuite.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.catalyst.expressions + +import org.apache.spark.sql.GlutenTestsTrait + +class GlutenTryCastSuite extends TryCastSuite with GlutenTestsTrait {} From bd318a4dbe817caa5714b2486e9e7f9ec267e9c8 Mon Sep 17 00:00:00 2001 From: Ankita Victor Date: Mon, 2 Dec 2024 15:36:01 +0530 Subject: [PATCH 2/8] Address comments --- .../ScalarFunctionsValidateSuite.scala | 19 +++++++-- .../UnaryExpressionTransformer.scala | 6 +-- .../utils/velox/VeloxTestSettings.scala | 10 +++++ .../utils/velox/VeloxTestSettings.scala | 1 - .../GlutenCastWithAnsiOnSuite.scala | 40 ------------------- .../apache/gluten/sql/shims/SparkShims.scala | 10 ++--- .../sql/shims/spark32/Spark32Shims.scala | 4 ++ .../sql/shims/spark33/Spark33Shims.scala | 4 ++ .../sql/shims/spark34/Spark34Shims.scala | 4 ++ .../sql/shims/spark35/Spark35Shims.scala | 4 ++ 10 files changed, 49 insertions(+), 53 deletions(-) delete mode 100644 gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastWithAnsiOnSuite.scala diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index bcea2d7230af..78203f1466cd 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -1458,7 +1458,7 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite { } } - test("Test cast and try_cast") { + test("Test try_cast") { withTempView("try_cast_table") { withTempPath { path => @@ -1470,10 +1470,23 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite { runQueryAndCompare("select try_cast(str as bigint) from try_cast_table") { checkGlutenOperatorMatch[ProjectExecTransformer] } - runQueryAndCompare("select cast(str as bigint) from try_cast_table") { + runQueryAndCompare("select try_cast(str as double) from try_cast_table") { checkGlutenOperatorMatch[ProjectExecTransformer] } - runQueryAndCompare("select try_cast(str as double) from try_cast_table") { + } + } + } + + test("Test cast") { + withTempView("cast_table") { + withTempPath { + path => + Seq[(String)](("123456"), ("000A1234")) + .toDF("str") + .write + .parquet(path.getCanonicalPath) + spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("cast_table") + runQueryAndCompare("select cast(str as bigint) from try_cast_table") { checkGlutenOperatorMatch[ProjectExecTransformer] } runQueryAndCompare("select cast(str as double) from try_cast_table") { diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala index 50813bc12ddb..d69ec4af87bf 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala @@ -18,13 +18,11 @@ package org.apache.gluten.expression import org.apache.gluten.backendsapi.BackendsApiManager import org.apache.gluten.exception.GlutenNotSupportException +import org.apache.gluten.sql.shims.SparkShimLoader import org.apache.gluten.substrait.`type`.ListNode import org.apache.gluten.substrait.`type`.MapNode import org.apache.gluten.substrait.expression.{ExpressionBuilder, ExpressionNode, StructLiteralNode} - -import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ - import com.google.common.collect.Lists case class ChildTransformer( @@ -46,7 +44,7 @@ case class CastTransformer(substraitExprName: String, child: ExpressionTransform ExpressionBuilder.makeCast( typeNode, child.doTransform(args), - original.evalMode == EvalMode.ANSI) + SparkShimLoader.getSparkShims.ansiEnabled(original)) } } diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index df79b663bcbe..3109f96ce9b7 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -91,6 +91,16 @@ class VeloxTestSettings extends BackendTestSettings { .exclude( "Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly. ) + enableSuite[GlutenTryCastSuite] + .exclude( + "Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly. + ) + .exclude("ANSI mode: Throw exception on casting out-of-range value to byte type") + .exclude("ANSI mode: Throw exception on casting out-of-range value to short type") + .exclude("ANSI mode: Throw exception on casting out-of-range value to int type") + .exclude("ANSI mode: Throw exception on casting out-of-range value to long type") + .exclude("cast from invalid string to numeric should throw NumberFormatException") + .exclude("SPARK-26218: Fix the corner case of codegen when casting float to Integer") enableSuite[GlutenCollectionExpressionsSuite] // Rewrite in Gluten to replace Seq with Array .exclude("Shuffle") diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 14066d55a4e5..d24176ec3ebb 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -92,7 +92,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude( "Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly. ) - enableSuite[GlutenCastWithAnsiOnSuite] enableSuite[GlutenTryCastSuite] .exclude( "Process Infinity, -Infinity, NaN in case insensitive manner" // +inf not supported in folly. diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastWithAnsiOnSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastWithAnsiOnSuite.scala deleted file mode 100644 index 5a0a07c58653..000000000000 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastWithAnsiOnSuite.scala +++ /dev/null @@ -1,40 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql.catalyst.expressions - -import org.apache.spark.sql.GlutenTestsTrait -import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.sql.types.DataType - -class GlutenCastWithAnsiOnSuite extends CastWithAnsiOnSuite with GlutenTestsTrait { - override def beforeAll(): Unit = { - super.beforeAll() - SQLConf.get.setConf(SQLConf.ANSI_ENABLED, true) - } - - override def afterAll(): Unit = { - super.afterAll() - SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED) - } - - override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): Cast = { - v match { - case lit: Expression => Cast(lit, targetType, timeZoneId) - case _ => Cast(Literal(v), targetType, timeZoneId) - } - } -} diff --git a/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala b/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala index fba6a4a5a48a..fe23190ed681 100644 --- a/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala +++ b/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala @@ -17,7 +17,6 @@ package org.apache.gluten.sql.shims import org.apache.gluten.expression.Sig - import org.apache.spark.{SparkContext, TaskContext} import org.apache.spark.broadcast.Broadcast import org.apache.spark.internal.io.FileCommitProtocol @@ -27,7 +26,7 @@ import org.apache.spark.sql.{AnalysisException, SparkSession} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.BucketSpec import org.apache.spark.sql.catalyst.csv.CSVOptions -import org.apache.spark.sql.catalyst.expressions.{Attribute, BinaryExpression, Expression} +import org.apache.spark.sql.catalyst.expressions.{Attribute, BinaryExpression, Cast, Expression} import org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan @@ -47,12 +46,10 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{DecimalType, StructType} import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.storage.{BlockId, BlockManagerId} - import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.parquet.schema.MessageType -import java.util.{Map => JMap, Properties} - +import java.util.{Properties, Map => JMap} import scala.reflect.ClassTag sealed abstract class ShimDescriptor @@ -285,4 +282,7 @@ trait SparkShims { /** Shim method for usages from GlutenExplainUtils.scala. */ def unsetOperatorId(plan: QueryPlan[_]): Unit + + /** Shim method to determine failure behaviour for Cast */ + def ansiEnabled(original: Cast): Boolean } diff --git a/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala b/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala index 833d2385b03a..3b4e665d1007 100644 --- a/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala +++ b/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala @@ -296,4 +296,8 @@ class Spark32Shims extends SparkShims { override def unsetOperatorId(plan: QueryPlan[_]): Unit = { plan.unsetTagValue(QueryPlan.OP_ID_TAG) } + + override def ansiEnabled(original: Cast): Boolean = { + original.ansiEnabled + } } diff --git a/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala b/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala index 2135780d05fb..382abf0ef6da 100644 --- a/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala +++ b/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala @@ -377,4 +377,8 @@ class Spark33Shims extends SparkShims { override def unsetOperatorId(plan: QueryPlan[_]): Unit = { plan.unsetTagValue(QueryPlan.OP_ID_TAG) } + + override def ansiEnabled(original: Cast): Boolean = { + original.ansiEnabled + } } diff --git a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala index bedad4c01741..96f3f88aa158 100644 --- a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala +++ b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala @@ -512,4 +512,8 @@ class Spark34Shims extends SparkShims { override def unsetOperatorId(plan: QueryPlan[_]): Unit = { plan.unsetTagValue(QueryPlan.OP_ID_TAG) } + + override def ansiEnabled(original: Cast): Boolean = { + original.evalMode == EvalMode.ANSI + } } diff --git a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala index 43ed51579a1b..65234823d4df 100644 --- a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala +++ b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala @@ -549,4 +549,8 @@ class Spark35Shims extends SparkShims { override def unsetOperatorId(plan: QueryPlan[_]): Unit = { QueryPlan.localIdMap.get().remove(plan) } + + override def ansiEnabled(original: Cast): Boolean = { + original.evalMode == EvalMode.ANSI + } } From e3755a6717f2e363f643534f6e5ab2998b2bb87d Mon Sep 17 00:00:00 2001 From: Ankita Victor Date: Mon, 2 Dec 2024 15:41:15 +0530 Subject: [PATCH 3/8] Reuse func --- .../gluten/expression/UnaryExpressionTransformer.scala | 2 +- .../scala/org/apache/gluten/sql/shims/SparkShims.scala | 10 +++++----- .../apache/gluten/sql/shims/spark32/Spark32Shims.scala | 4 ---- .../apache/gluten/sql/shims/spark33/Spark33Shims.scala | 4 ---- .../apache/gluten/sql/shims/spark34/Spark34Shims.scala | 4 ---- .../apache/gluten/sql/shims/spark35/Spark35Shims.scala | 4 ---- 6 files changed, 6 insertions(+), 22 deletions(-) diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala index d69ec4af87bf..ac2354c38a12 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala @@ -44,7 +44,7 @@ case class CastTransformer(substraitExprName: String, child: ExpressionTransform ExpressionBuilder.makeCast( typeNode, child.doTransform(args), - SparkShimLoader.getSparkShims.ansiEnabled(original)) + SparkShimLoader.getSparkShims.withAnsiEvalMode(original)) } } diff --git a/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala b/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala index fe23190ed681..fba6a4a5a48a 100644 --- a/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala +++ b/shims/common/src/main/scala/org/apache/gluten/sql/shims/SparkShims.scala @@ -17,6 +17,7 @@ package org.apache.gluten.sql.shims import org.apache.gluten.expression.Sig + import org.apache.spark.{SparkContext, TaskContext} import org.apache.spark.broadcast.Broadcast import org.apache.spark.internal.io.FileCommitProtocol @@ -26,7 +27,7 @@ import org.apache.spark.sql.{AnalysisException, SparkSession} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.BucketSpec import org.apache.spark.sql.catalyst.csv.CSVOptions -import org.apache.spark.sql.catalyst.expressions.{Attribute, BinaryExpression, Cast, Expression} +import org.apache.spark.sql.catalyst.expressions.{Attribute, BinaryExpression, Expression} import org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate import org.apache.spark.sql.catalyst.plans.QueryPlan import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan @@ -46,10 +47,12 @@ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{DecimalType, StructType} import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.storage.{BlockId, BlockManagerId} + import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.parquet.schema.MessageType -import java.util.{Properties, Map => JMap} +import java.util.{Map => JMap, Properties} + import scala.reflect.ClassTag sealed abstract class ShimDescriptor @@ -282,7 +285,4 @@ trait SparkShims { /** Shim method for usages from GlutenExplainUtils.scala. */ def unsetOperatorId(plan: QueryPlan[_]): Unit - - /** Shim method to determine failure behaviour for Cast */ - def ansiEnabled(original: Cast): Boolean } diff --git a/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala b/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala index 3b4e665d1007..833d2385b03a 100644 --- a/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala +++ b/shims/spark32/src/main/scala/org/apache/gluten/sql/shims/spark32/Spark32Shims.scala @@ -296,8 +296,4 @@ class Spark32Shims extends SparkShims { override def unsetOperatorId(plan: QueryPlan[_]): Unit = { plan.unsetTagValue(QueryPlan.OP_ID_TAG) } - - override def ansiEnabled(original: Cast): Boolean = { - original.ansiEnabled - } } diff --git a/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala b/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala index 382abf0ef6da..2135780d05fb 100644 --- a/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala +++ b/shims/spark33/src/main/scala/org/apache/gluten/sql/shims/spark33/Spark33Shims.scala @@ -377,8 +377,4 @@ class Spark33Shims extends SparkShims { override def unsetOperatorId(plan: QueryPlan[_]): Unit = { plan.unsetTagValue(QueryPlan.OP_ID_TAG) } - - override def ansiEnabled(original: Cast): Boolean = { - original.ansiEnabled - } } diff --git a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala index 96f3f88aa158..bedad4c01741 100644 --- a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala +++ b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala @@ -512,8 +512,4 @@ class Spark34Shims extends SparkShims { override def unsetOperatorId(plan: QueryPlan[_]): Unit = { plan.unsetTagValue(QueryPlan.OP_ID_TAG) } - - override def ansiEnabled(original: Cast): Boolean = { - original.evalMode == EvalMode.ANSI - } } diff --git a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala index 65234823d4df..43ed51579a1b 100644 --- a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala +++ b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala @@ -549,8 +549,4 @@ class Spark35Shims extends SparkShims { override def unsetOperatorId(plan: QueryPlan[_]): Unit = { QueryPlan.localIdMap.get().remove(plan) } - - override def ansiEnabled(original: Cast): Boolean = { - original.evalMode == EvalMode.ANSI - } } From 51442928123fc63673c0502e8f7eda11ed425f15 Mon Sep 17 00:00:00 2001 From: Ankita Victor Date: Mon, 2 Dec 2024 16:55:27 +0530 Subject: [PATCH 4/8] Fix format --- .../apache/gluten/expression/UnaryExpressionTransformer.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala index ac2354c38a12..c3c5711712a7 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala @@ -22,7 +22,9 @@ import org.apache.gluten.sql.shims.SparkShimLoader import org.apache.gluten.substrait.`type`.ListNode import org.apache.gluten.substrait.`type`.MapNode import org.apache.gluten.substrait.expression.{ExpressionBuilder, ExpressionNode, StructLiteralNode} + import org.apache.spark.sql.types._ + import com.google.common.collect.Lists case class ChildTransformer( From 3fbef08489f1234ded6c6935dc6cbb9a446e0302 Mon Sep 17 00:00:00 2001 From: Ankita Victor Date: Mon, 2 Dec 2024 16:59:36 +0530 Subject: [PATCH 5/8] Fix format --- .../expressions/GlutenTryCastSuite.scala | 21 +++++++++++++++++++ .../utils/velox/VeloxTestSettings.scala | 2 +- 2 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenTryCastSuite.scala diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenTryCastSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenTryCastSuite.scala new file mode 100644 index 000000000000..b0bddf935467 --- /dev/null +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenTryCastSuite.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.catalyst.expressions + +import org.apache.spark.sql.GlutenTestsTrait + +class GlutenTryCastSuite extends TryCastSuite with GlutenTestsTrait {} diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index d24176ec3ebb..8263252667b6 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -20,7 +20,7 @@ import org.apache.gluten.utils.{BackendTestSettings, SQLQueryTestSettings} import org.apache.spark.GlutenSortShuffleSuite import org.apache.spark.sql._ -import org.apache.spark.sql.catalyst.expressions.{GlutenArithmeticExpressionSuite, GlutenBitwiseExpressionsSuite, GlutenCastSuite, GlutenCastWithAnsiOnSuite, GlutenCollectionExpressionsSuite, GlutenComplexTypeSuite, GlutenConditionalExpressionSuite, GlutenDateExpressionsSuite, GlutenDecimalExpressionSuite, GlutenDecimalPrecisionSuite, GlutenHashExpressionsSuite, GlutenHigherOrderFunctionsSuite, GlutenIntervalExpressionsSuite, GlutenLiteralExpressionSuite, GlutenMathExpressionsSuite, GlutenMiscExpressionsSuite, GlutenNondeterministicSuite, GlutenNullExpressionsSuite, GlutenPredicateSuite, GlutenRandomSuite, GlutenRegexpExpressionsSuite, GlutenSortOrderExpressionsSuite, GlutenStringExpressionsSuite, GlutenTryCastSuite, GlutenTryEvalSuite} +import org.apache.spark.sql.catalyst.expressions.{GlutenArithmeticExpressionSuite, GlutenBitwiseExpressionsSuite, GlutenCastSuite, GlutenCollectionExpressionsSuite, GlutenComplexTypeSuite, GlutenConditionalExpressionSuite, GlutenDateExpressionsSuite, GlutenDecimalExpressionSuite, GlutenDecimalPrecisionSuite, GlutenHashExpressionsSuite, GlutenHigherOrderFunctionsSuite, GlutenIntervalExpressionsSuite, GlutenLiteralExpressionSuite, GlutenMathExpressionsSuite, GlutenMiscExpressionsSuite, GlutenNondeterministicSuite, GlutenNullExpressionsSuite, GlutenPredicateSuite, GlutenRandomSuite, GlutenRegexpExpressionsSuite, GlutenSortOrderExpressionsSuite, GlutenStringExpressionsSuite, GlutenTryCastSuite, GlutenTryEvalSuite} import org.apache.spark.sql.connector._ import org.apache.spark.sql.errors.{GlutenQueryCompilationErrorsDSv2Suite, GlutenQueryCompilationErrorsSuite, GlutenQueryExecutionErrorsSuite, GlutenQueryParsingErrorsSuite} import org.apache.spark.sql.execution._ From a6df752f0c484c57fd99e1c51e01436753029b25 Mon Sep 17 00:00:00 2001 From: Ankita Victor Date: Mon, 2 Dec 2024 17:00:46 +0530 Subject: [PATCH 6/8] Add to settings --- .../scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 3109f96ce9b7..8682b6245234 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -20,7 +20,7 @@ import org.apache.gluten.utils.{BackendTestSettings, SQLQueryTestSettings} import org.apache.spark.GlutenSortShuffleSuite import org.apache.spark.sql._ -import org.apache.spark.sql.catalyst.expressions.{GlutenArithmeticExpressionSuite, GlutenBitwiseExpressionsSuite, GlutenCastSuite, GlutenCollectionExpressionsSuite, GlutenComplexTypeSuite, GlutenConditionalExpressionSuite, GlutenDateExpressionsSuite, GlutenDecimalExpressionSuite, GlutenDecimalPrecisionSuite, GlutenHashExpressionsSuite, GlutenHigherOrderFunctionsSuite, GlutenIntervalExpressionsSuite, GlutenLiteralExpressionSuite, GlutenMathExpressionsSuite, GlutenMiscExpressionsSuite, GlutenNondeterministicSuite, GlutenNullExpressionsSuite, GlutenPredicateSuite, GlutenRandomSuite, GlutenRegexpExpressionsSuite, GlutenSortOrderExpressionsSuite, GlutenStringExpressionsSuite, GlutenTryEvalSuite} +import org.apache.spark.sql.catalyst.expressions.{GlutenArithmeticExpressionSuite, GlutenBitwiseExpressionsSuite, GlutenCastSuite, GlutenCollectionExpressionsSuite, GlutenComplexTypeSuite, GlutenConditionalExpressionSuite, GlutenDateExpressionsSuite, GlutenDecimalExpressionSuite, GlutenDecimalPrecisionSuite, GlutenHashExpressionsSuite, GlutenHigherOrderFunctionsSuite, GlutenIntervalExpressionsSuite, GlutenLiteralExpressionSuite, GlutenMathExpressionsSuite, GlutenMiscExpressionsSuite, GlutenNondeterministicSuite, GlutenNullExpressionsSuite, GlutenPredicateSuite, GlutenRandomSuite, GlutenRegexpExpressionsSuite, GlutenSortOrderExpressionsSuite, GlutenStringExpressionsSuite, GlutenTryCastSuite, GlutenTryEvalSuite} import org.apache.spark.sql.connector.{GlutenDataSourceV2DataFrameSessionCatalogSuite, GlutenDataSourceV2DataFrameSuite, GlutenDataSourceV2FunctionSuite, GlutenDataSourceV2SQLSessionCatalogSuite, GlutenDataSourceV2SQLSuiteV1Filter, GlutenDataSourceV2SQLSuiteV2Filter, GlutenDataSourceV2Suite, GlutenDeleteFromTableSuite, GlutenDeltaBasedDeleteFromTableSuite, GlutenFileDataSourceV2FallBackSuite, GlutenGroupBasedDeleteFromTableSuite, GlutenKeyGroupedPartitioningSuite, GlutenLocalScanSuite, GlutenMetadataColumnSuite, GlutenSupportsCatalogOptionsSuite, GlutenTableCapabilityCheckSuite, GlutenWriteDistributionAndOrderingSuite} import org.apache.spark.sql.errors.{GlutenQueryCompilationErrorsDSv2Suite, GlutenQueryCompilationErrorsSuite, GlutenQueryExecutionErrorsSuite, GlutenQueryParsingErrorsSuite} import org.apache.spark.sql.execution.{FallbackStrategiesSuite, GlutenBroadcastExchangeSuite, GlutenCoalesceShufflePartitionsSuite, GlutenExchangeSuite, GlutenLocalBroadcastExchangeSuite, GlutenReplaceHashWithSortAggSuite, GlutenReuseExchangeAndSubquerySuite, GlutenSameResultSuite, GlutenSortSuite, GlutenSQLAggregateFunctionSuite, GlutenSQLWindowFunctionSuite, GlutenTakeOrderedAndProjectSuite} From 4d2de46e25909085c48e6ca37bbfbb8acef936e0 Mon Sep 17 00:00:00 2001 From: Ankita Victor Date: Mon, 2 Dec 2024 17:29:58 +0530 Subject: [PATCH 7/8] Add import --- .../apache/gluten/expression/UnaryExpressionTransformer.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala index c3c5711712a7..f9eb1e8eab42 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/expression/UnaryExpressionTransformer.scala @@ -23,6 +23,7 @@ import org.apache.gluten.substrait.`type`.ListNode import org.apache.gluten.substrait.`type`.MapNode import org.apache.gluten.substrait.expression.{ExpressionBuilder, ExpressionNode, StructLiteralNode} +import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.types._ import com.google.common.collect.Lists From ca38e625588647a5a89b933a5d89b9b29d52d772 Mon Sep 17 00:00:00 2001 From: Ankita Victor Date: Mon, 2 Dec 2024 20:10:03 +0530 Subject: [PATCH 8/8] Fix typo --- .../gluten/execution/ScalarFunctionsValidateSuite.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index 78203f1466cd..70e53c8619aa 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -1458,7 +1458,7 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite { } } - test("Test try_cast") { + testWithSpecifiedSparkVersion("Test try_cast", Some("3.4")) { withTempView("try_cast_table") { withTempPath { path => @@ -1486,10 +1486,10 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite { .write .parquet(path.getCanonicalPath) spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("cast_table") - runQueryAndCompare("select cast(str as bigint) from try_cast_table") { + runQueryAndCompare("select cast(str as bigint) from cast_table") { checkGlutenOperatorMatch[ProjectExecTransformer] } - runQueryAndCompare("select cast(str as double) from try_cast_table") { + runQueryAndCompare("select cast(str as double) from cast_table") { checkGlutenOperatorMatch[ProjectExecTransformer] } }