diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala index e13ebd971ef5..711c114590f3 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala @@ -22,7 +22,7 @@ import org.apache.gluten.datasource.ArrowConvertorRule import org.apache.gluten.exception.GlutenNotSupportException import org.apache.gluten.execution._ import org.apache.gluten.expression._ -import org.apache.gluten.expression.ExpressionNames.{TRANSFORM_KEYS, TRANSFORM_VALUES} +import org.apache.gluten.expression.ExpressionNames.{RAISE_ERROR, TRANSFORM_KEYS, TRANSFORM_VALUES} import org.apache.gluten.expression.aggregate.{HLLAdapter, VeloxBloomFilterAggregate, VeloxCollectList, VeloxCollectSet} import org.apache.gluten.extension._ import org.apache.gluten.extension.columnar.FallbackTags @@ -837,6 +837,7 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi { Sig[VeloxBloomFilterAggregate](ExpressionNames.BLOOM_FILTER_AGG), Sig[TransformKeys](TRANSFORM_KEYS), Sig[TransformValues](TRANSFORM_VALUES), + Sig[RaiseError](RAISE_ERROR), // For test purpose. Sig[VeloxDummyExpression](VeloxDummyExpression.VELOX_DUMMY_EXPRESSION) ) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index 1b89855c9ab7..ebbe664feec3 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -673,6 +673,7 @@ class ScalarFunctionsValidateSuite extends FunctionsValidateTest { sql("""SELECT assert_true(l_orderkey >= 100), l_orderkey from | lineitem limit 100""".stripMargin).collect() } + assert(e.getCause.isInstanceOf[RuntimeException]) assert(e.getMessage.contains("l_orderkey")) } diff --git a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala index f3f524110e4b..51e78a97e997 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala @@ -284,7 +284,6 @@ object ExpressionMappings { Sig[SparkPartitionID](SPARK_PARTITION_ID), Sig[WidthBucket](WIDTH_BUCKET), Sig[ReplicateRows](REPLICATE_ROWS), - Sig[RaiseError](RAISE_ERROR), // Decimal Sig[UnscaledValue](UNSCALED_VALUE), // Generator function diff --git a/gluten-ut/spark32/src/test/resources/sql-tests/inputs/misc-functions.sql b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/misc-functions.sql new file mode 100644 index 000000000000..907ff33000d8 --- /dev/null +++ b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/misc-functions.sql @@ -0,0 +1,22 @@ +-- test for misc functions + +-- typeof +select typeof(null); +select typeof(true); +select typeof(1Y), typeof(1S), typeof(1), typeof(1L); +select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2); +select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days'); +select typeof(x'ABCD'), typeof('SPARK'); +select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark')); + +-- Spark-32793: Rewrite AssertTrue with RaiseError +SELECT assert_true(true), assert_true(boolean(1)); +SELECT assert_true(false); +SELECT assert_true(boolean(0)); +SELECT assert_true(null); +SELECT assert_true(boolean(null)); +SELECT assert_true(false, 'custom error message'); + +CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v); +SELECT raise_error('error message'); +SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc; diff --git a/gluten-ut/spark32/src/test/resources/sql-tests/results/misc-functions.sql.out b/gluten-ut/spark32/src/test/resources/sql-tests/results/misc-functions.sql.out new file mode 100644 index 000000000000..6985233c3318 --- /dev/null +++ b/gluten-ut/spark32/src/test/resources/sql-tests/results/misc-functions.sql.out @@ -0,0 +1,137 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 16 + + +-- !query +select typeof(null) +-- !query schema +struct +-- !query output +void + + +-- !query +select typeof(true) +-- !query schema +struct +-- !query output +boolean + + +-- !query +select typeof(1Y), typeof(1S), typeof(1), typeof(1L) +-- !query schema +struct +-- !query output +tinyint smallint int bigint + + +-- !query +select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2) +-- !query schema +struct +-- !query output +float double decimal(2,1) + + +-- !query +select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days') +-- !query schema +struct +-- !query output +date timestamp interval day + + +-- !query +select typeof(x'ABCD'), typeof('SPARK') +-- !query schema +struct +-- !query output +binary string + + +-- !query +select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark')) +-- !query schema +struct +-- !query output +array map struct + + +-- !query +SELECT assert_true(true), assert_true(boolean(1)) +-- !query schema +struct +-- !query output +NULL NULL + + +-- !query +SELECT assert_true(false) +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +'false' is not true! + + +-- !query +SELECT assert_true(boolean(0)) +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +'cast(0 as boolean)' is not true! + + +-- !query +SELECT assert_true(null) +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +'null' is not true! + + +-- !query +SELECT assert_true(boolean(null)) +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +'cast(null as boolean)' is not true! + + +-- !query +SELECT assert_true(false, 'custom error message') +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +custom error message + + +-- !query +CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT raise_error('error message') +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +error message + + +-- !query +SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +too big: 8 diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala index 4b75ce13c067..4fbd89bda05c 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala @@ -17,9 +17,10 @@ package org.apache.spark.sql import org.apache.gluten.GlutenConfig +import org.apache.gluten.exception.GlutenException import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils, SystemParameters} -import org.apache.spark.SparkConf +import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.rules.RuleExecutor @@ -39,6 +40,7 @@ import java.util.Locale import scala.collection.mutable.ArrayBuffer import scala.sys.process.{Process, ProcessLogger} import scala.util.Try +import scala.util.control.NonFatal /** * End-to-end test cases for SQL queries. @@ -761,4 +763,45 @@ class GlutenSQLQueryTestSuite super.afterAll() } } + + /** + * This method handles exceptions occurred during query execution as they may need special care to + * become comparable to the expected output. + * + * @param result + * a function that returns a pair of schema and output + */ + override protected def handleExceptions( + result: => (String, Seq[String])): (String, Seq[String]) = { + try { + result + } catch { + case a: AnalysisException => + // Do not output the logical plan tree which contains expression IDs. + // Also implement a crude way of masking expression IDs in the error message + // with a generic pattern "###". + val msg = if (a.plan.nonEmpty) a.getSimpleMessage else a.getMessage + (emptySchema, Seq(a.getClass.getName, msg.replaceAll("#\\d+", "#x"))) + case s: SparkException if s.getCause != null => + // For a runtime exception, it is hard to match because its message contains + // information of stage, task ID, etc. + // To make result matching simpler, here we match the cause of the exception if it exists. + s.getCause match { + case e: GlutenException => + val reasonPattern = "Reason: (.*)".r + val reason = reasonPattern.findFirstMatchIn(e.getMessage).map(_.group(1)) + + reason match { + case Some(r) => + (emptySchema, Seq(e.getClass.getName, r)) + case None => (emptySchema, Seq()) + } + case cause => + (emptySchema, Seq(cause.getClass.getName, cause.getMessage)) + } + case NonFatal(e) => + // If there is an exception, put the exception class followed by the message. + (emptySchema, Seq(e.getClass.getName, e.getMessage)) + } + } } diff --git a/gluten-ut/spark33/src/test/resources/sql-tests/inputs/misc-functions.sql b/gluten-ut/spark33/src/test/resources/sql-tests/inputs/misc-functions.sql new file mode 100644 index 000000000000..907ff33000d8 --- /dev/null +++ b/gluten-ut/spark33/src/test/resources/sql-tests/inputs/misc-functions.sql @@ -0,0 +1,22 @@ +-- test for misc functions + +-- typeof +select typeof(null); +select typeof(true); +select typeof(1Y), typeof(1S), typeof(1), typeof(1L); +select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2); +select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days'); +select typeof(x'ABCD'), typeof('SPARK'); +select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark')); + +-- Spark-32793: Rewrite AssertTrue with RaiseError +SELECT assert_true(true), assert_true(boolean(1)); +SELECT assert_true(false); +SELECT assert_true(boolean(0)); +SELECT assert_true(null); +SELECT assert_true(boolean(null)); +SELECT assert_true(false, 'custom error message'); + +CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v); +SELECT raise_error('error message'); +SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc; diff --git a/gluten-ut/spark33/src/test/resources/sql-tests/results/misc-functions.sql.out b/gluten-ut/spark33/src/test/resources/sql-tests/results/misc-functions.sql.out new file mode 100644 index 000000000000..6985233c3318 --- /dev/null +++ b/gluten-ut/spark33/src/test/resources/sql-tests/results/misc-functions.sql.out @@ -0,0 +1,137 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 16 + + +-- !query +select typeof(null) +-- !query schema +struct +-- !query output +void + + +-- !query +select typeof(true) +-- !query schema +struct +-- !query output +boolean + + +-- !query +select typeof(1Y), typeof(1S), typeof(1), typeof(1L) +-- !query schema +struct +-- !query output +tinyint smallint int bigint + + +-- !query +select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2) +-- !query schema +struct +-- !query output +float double decimal(2,1) + + +-- !query +select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days') +-- !query schema +struct +-- !query output +date timestamp interval day + + +-- !query +select typeof(x'ABCD'), typeof('SPARK') +-- !query schema +struct +-- !query output +binary string + + +-- !query +select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark')) +-- !query schema +struct +-- !query output +array map struct + + +-- !query +SELECT assert_true(true), assert_true(boolean(1)) +-- !query schema +struct +-- !query output +NULL NULL + + +-- !query +SELECT assert_true(false) +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +'false' is not true! + + +-- !query +SELECT assert_true(boolean(0)) +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +'cast(0 as boolean)' is not true! + + +-- !query +SELECT assert_true(null) +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +'null' is not true! + + +-- !query +SELECT assert_true(boolean(null)) +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +'cast(null as boolean)' is not true! + + +-- !query +SELECT assert_true(false, 'custom error message') +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +custom error message + + +-- !query +CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT raise_error('error message') +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +error message + + +-- !query +SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +too big: 8 diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala index 4536aa54057c..6e2a9efa87c0 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala @@ -17,9 +17,10 @@ package org.apache.spark.sql import org.apache.gluten.GlutenConfig +import org.apache.gluten.exception.GlutenException import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils, SystemParameters} -import org.apache.spark.SparkConf +import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.rules.RuleExecutor @@ -39,6 +40,7 @@ import java.util.Locale import scala.collection.mutable.ArrayBuffer import scala.sys.process.{Process, ProcessLogger} import scala.util.Try +import scala.util.control.NonFatal /** * End-to-end test cases for SQL queries. @@ -761,4 +763,45 @@ class GlutenSQLQueryTestSuite super.afterAll() } } + + /** + * This method handles exceptions occurred during query execution as they may need special care to + * become comparable to the expected output. + * + * @param result + * a function that returns a pair of schema and output + */ + override protected def handleExceptions( + result: => (String, Seq[String])): (String, Seq[String]) = { + try { + result + } catch { + case a: AnalysisException => + // Do not output the logical plan tree which contains expression IDs. + // Also implement a crude way of masking expression IDs in the error message + // with a generic pattern "###". + val msg = if (a.plan.nonEmpty) a.getSimpleMessage else a.getMessage + (emptySchema, Seq(a.getClass.getName, msg.replaceAll("#\\d+", "#x"))) + case s: SparkException if s.getCause != null => + // For a runtime exception, it is hard to match because its message contains + // information of stage, task ID, etc. + // To make result matching simpler, here we match the cause of the exception if it exists. + val cause = s.getCause + cause match { + case e: GlutenException => + val reasonPattern = "Reason: (.*)".r + val reason = reasonPattern.findFirstMatchIn(e.getMessage).map(_.group(1)) + + reason match { + case Some(r) => + (emptySchema, Seq(e.getClass.getName, r)) + case None => (emptySchema, Seq()) + } + case _ => (emptySchema, Seq(cause.getClass.getName, cause.getMessage)) + } + case NonFatal(e) => + // If there is an exception, put the exception class followed by the message. + (emptySchema, Seq(e.getClass.getName, e.getMessage)) + } + } } diff --git a/gluten-ut/spark34/src/test/resources/sql-tests/inputs/misc-functions.sql b/gluten-ut/spark34/src/test/resources/sql-tests/inputs/misc-functions.sql new file mode 100644 index 000000000000..907ff33000d8 --- /dev/null +++ b/gluten-ut/spark34/src/test/resources/sql-tests/inputs/misc-functions.sql @@ -0,0 +1,22 @@ +-- test for misc functions + +-- typeof +select typeof(null); +select typeof(true); +select typeof(1Y), typeof(1S), typeof(1), typeof(1L); +select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2); +select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days'); +select typeof(x'ABCD'), typeof('SPARK'); +select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark')); + +-- Spark-32793: Rewrite AssertTrue with RaiseError +SELECT assert_true(true), assert_true(boolean(1)); +SELECT assert_true(false); +SELECT assert_true(boolean(0)); +SELECT assert_true(null); +SELECT assert_true(boolean(null)); +SELECT assert_true(false, 'custom error message'); + +CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v); +SELECT raise_error('error message'); +SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc; diff --git a/gluten-ut/spark34/src/test/resources/sql-tests/results/misc-functions.sql.out b/gluten-ut/spark34/src/test/resources/sql-tests/results/misc-functions.sql.out new file mode 100644 index 000000000000..d6d1289a5adb --- /dev/null +++ b/gluten-ut/spark34/src/test/resources/sql-tests/results/misc-functions.sql.out @@ -0,0 +1,134 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +select typeof(null) +-- !query schema +struct +-- !query output +void + + +-- !query +select typeof(true) +-- !query schema +struct +-- !query output +boolean + + +-- !query +select typeof(1Y), typeof(1S), typeof(1), typeof(1L) +-- !query schema +struct +-- !query output +tinyint smallint int bigint + + +-- !query +select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2) +-- !query schema +struct +-- !query output +float double decimal(2,1) + + +-- !query +select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days') +-- !query schema +struct +-- !query output +date timestamp interval day + + +-- !query +select typeof(x'ABCD'), typeof('SPARK') +-- !query schema +struct +-- !query output +binary string + + +-- !query +select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark')) +-- !query schema +struct +-- !query output +array map struct + + +-- !query +SELECT assert_true(true), assert_true(boolean(1)) +-- !query schema +struct +-- !query output +NULL NULL + + +-- !query +SELECT assert_true(false) +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +'false' is not true! + + +-- !query +SELECT assert_true(boolean(0)) +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +'cast(0 as boolean)' is not true! + + +-- !query +SELECT assert_true(null) +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +'null' is not true! + + +-- !query +SELECT assert_true(boolean(null)) +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +'cast(null as boolean)' is not true! + + +-- !query +SELECT assert_true(false, 'custom error message') +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +custom error message + + +-- !query +CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT raise_error('error message') +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +error message + + +-- !query +SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +too big: 8 diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala index 0ea1f13ec2ef..8a291990ea31 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala @@ -17,9 +17,12 @@ package org.apache.spark.sql import org.apache.gluten.GlutenConfig +import org.apache.gluten.exception.GlutenException import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils, SystemParameters} -import org.apache.spark.SparkConf +import org.apache.spark.{SparkConf, SparkException, SparkThrowable} +import org.apache.spark.ErrorMessageFormat.MINIMAL +import org.apache.spark.SparkThrowableHelper.getMessage import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.rules.RuleExecutor @@ -40,6 +43,7 @@ import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.sys.process.{Process, ProcessLogger} import scala.util.Try +import scala.util.control.NonFatal /** * End-to-end test cases for SQL queries. @@ -781,4 +785,50 @@ class GlutenSQLQueryTestSuite super.afterAll() } } + + /** + * This method handles exceptions occurred during query execution as they may need special care to + * become comparable to the expected output. + * + * @param result + * a function that returns a pair of schema and output + */ + override protected def handleExceptions( + result: => (String, Seq[String])): (String, Seq[String]) = { + val format = MINIMAL + try { + result + } catch { + case e: SparkThrowable with Throwable if e.getErrorClass != null => + (emptySchema, Seq(e.getClass.getName, getMessage(e, format))) + case a: AnalysisException => + // Do not output the logical plan tree which contains expression IDs. + // Also implement a crude way of masking expression IDs in the error message + // with a generic pattern "###". + (emptySchema, Seq(a.getClass.getName, a.getSimpleMessage.replaceAll("#\\d+", "#x"))) + case s: SparkException if s.getCause != null => + // For a runtime exception, it is hard to match because its message contains + // information of stage, task ID, etc. + // To make result matching simpler, here we match the cause of the exception if it exists. + s.getCause match { + case e: SparkThrowable with Throwable if e.getErrorClass != null => + (emptySchema, Seq(e.getClass.getName, getMessage(e, format))) + case e: GlutenException => + val reasonPattern = "Reason: (.*)".r + val reason = reasonPattern.findFirstMatchIn(e.getMessage).map(_.group(1)) + + reason match { + case Some(r) => + (emptySchema, Seq(e.getClass.getName, r)) + case None => (emptySchema, Seq()) + } + + case cause => + (emptySchema, Seq(cause.getClass.getName, cause.getMessage)) + } + case NonFatal(e) => + // If there is an exception, put the exception class followed by the message. + (emptySchema, Seq(e.getClass.getName, e.getMessage)) + } + } } diff --git a/gluten-ut/spark35/src/test/resources/sql-tests/inputs/misc-functions.sql b/gluten-ut/spark35/src/test/resources/sql-tests/inputs/misc-functions.sql new file mode 100644 index 000000000000..907ff33000d8 --- /dev/null +++ b/gluten-ut/spark35/src/test/resources/sql-tests/inputs/misc-functions.sql @@ -0,0 +1,22 @@ +-- test for misc functions + +-- typeof +select typeof(null); +select typeof(true); +select typeof(1Y), typeof(1S), typeof(1), typeof(1L); +select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2); +select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days'); +select typeof(x'ABCD'), typeof('SPARK'); +select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark')); + +-- Spark-32793: Rewrite AssertTrue with RaiseError +SELECT assert_true(true), assert_true(boolean(1)); +SELECT assert_true(false); +SELECT assert_true(boolean(0)); +SELECT assert_true(null); +SELECT assert_true(boolean(null)); +SELECT assert_true(false, 'custom error message'); + +CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v); +SELECT raise_error('error message'); +SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc; diff --git a/gluten-ut/spark35/src/test/resources/sql-tests/results/misc-functions.sql.out b/gluten-ut/spark35/src/test/resources/sql-tests/results/misc-functions.sql.out new file mode 100644 index 000000000000..d6d1289a5adb --- /dev/null +++ b/gluten-ut/spark35/src/test/resources/sql-tests/results/misc-functions.sql.out @@ -0,0 +1,134 @@ +-- Automatically generated by SQLQueryTestSuite +-- !query +select typeof(null) +-- !query schema +struct +-- !query output +void + + +-- !query +select typeof(true) +-- !query schema +struct +-- !query output +boolean + + +-- !query +select typeof(1Y), typeof(1S), typeof(1), typeof(1L) +-- !query schema +struct +-- !query output +tinyint smallint int bigint + + +-- !query +select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2) +-- !query schema +struct +-- !query output +float double decimal(2,1) + + +-- !query +select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days') +-- !query schema +struct +-- !query output +date timestamp interval day + + +-- !query +select typeof(x'ABCD'), typeof('SPARK') +-- !query schema +struct +-- !query output +binary string + + +-- !query +select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark')) +-- !query schema +struct +-- !query output +array map struct + + +-- !query +SELECT assert_true(true), assert_true(boolean(1)) +-- !query schema +struct +-- !query output +NULL NULL + + +-- !query +SELECT assert_true(false) +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +'false' is not true! + + +-- !query +SELECT assert_true(boolean(0)) +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +'cast(0 as boolean)' is not true! + + +-- !query +SELECT assert_true(null) +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +'null' is not true! + + +-- !query +SELECT assert_true(boolean(null)) +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +'cast(null as boolean)' is not true! + + +-- !query +SELECT assert_true(false, 'custom error message') +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +custom error message + + +-- !query +CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v) +-- !query schema +struct<> +-- !query output + + + +-- !query +SELECT raise_error('error message') +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +error message + + +-- !query +SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc +-- !query schema +struct<> +-- !query output +org.apache.gluten.exception.GlutenException +too big: 8 diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala index b1f3945bf192..8a6f5f32f891 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala @@ -17,9 +17,12 @@ package org.apache.spark.sql import org.apache.gluten.GlutenConfig +import org.apache.gluten.exception.GlutenException import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils, SystemParameters} -import org.apache.spark.SparkConf +import org.apache.spark.{SparkConf, SparkException, SparkThrowable} +import org.apache.spark.ErrorMessageFormat.MINIMAL +import org.apache.spark.SparkThrowableHelper.getMessage import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.rules.RuleExecutor @@ -40,6 +43,7 @@ import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.sys.process.{Process, ProcessLogger} import scala.util.Try +import scala.util.control.NonFatal /** * End-to-end test cases for SQL queries. @@ -783,4 +787,50 @@ class GlutenSQLQueryTestSuite super.afterAll() } } + + /** + * This method handles exceptions occurred during query execution as they may need special care to + * become comparable to the expected output. + * + * @param result + * a function that returns a pair of schema and output + */ + override protected def handleExceptions( + result: => (String, Seq[String])): (String, Seq[String]) = { + val format = MINIMAL + try { + result + } catch { + case e: SparkThrowable with Throwable if e.getErrorClass != null => + (emptySchema, Seq(e.getClass.getName, getMessage(e, format))) + case a: AnalysisException => + // Do not output the logical plan tree which contains expression IDs. + // Also implement a crude way of masking expression IDs in the error message + // with a generic pattern "###". + (emptySchema, Seq(a.getClass.getName, a.getSimpleMessage.replaceAll("#\\d+", "#x"))) + case s: SparkException if s.getCause != null => + // For a runtime exception, it is hard to match because its message contains + // information of stage, task ID, etc. + // To make result matching simpler, here we match the cause of the exception if it exists. + s.getCause match { + case e: SparkThrowable with Throwable if e.getErrorClass != null => + (emptySchema, Seq(e.getClass.getName, getMessage(e, format))) + case e: GlutenException => + val reasonPattern = "Reason: (.*)".r + val reason = reasonPattern.findFirstMatchIn(e.getMessage).map(_.group(1)) + + reason match { + case Some(r) => + (emptySchema, Seq(e.getClass.getName, r)) + case None => (emptySchema, Seq()) + } + + case cause => + (emptySchema, Seq(cause.getClass.getName, cause.getMessage)) + } + case NonFatal(e) => + // If there is an exception, put the exception class followed by the message. + (emptySchema, Seq(e.getClass.getName, e.getMessage)) + } + } }