From 1382f4c835ee1aaae066d266c733816c2f9f190a Mon Sep 17 00:00:00 2001 From: yan ma Date: Mon, 15 Jul 2024 18:03:29 +0800 Subject: [PATCH] Revert "[VL] Support Spark assert_true function (#6329)" This reverts commit 6f189c7cbae1a7d6cf80ff7f0a96afb5299a804f. --- .../gluten/utils/CHExpressionUtil.scala | 5 +- .../velox/VeloxSparkPlanExecApi.scala | 3 + .../ScalarFunctionsValidateSuite.scala | 14 -- .../expression/ExpressionMappings.scala | 3 - .../sql-tests/inputs/misc-functions.sql | 22 --- .../sql-tests/results/misc-functions.sql.out | 137 ------------------ .../utils/velox/VeloxTestSettings.scala | 5 - .../sql/GlutenColumnExpressionSuite.scala | 51 +------ .../spark/sql/GlutenSQLQueryTestSuite.scala | 45 +----- .../sql-tests/inputs/misc-functions.sql | 22 --- .../sql-tests/results/misc-functions.sql.out | 137 ------------------ .../utils/velox/VeloxTestSettings.scala | 5 - .../sql/GlutenColumnExpressionSuite.scala | 51 +------ .../spark/sql/GlutenSQLQueryTestSuite.scala | 45 +----- .../sql-tests/inputs/misc-functions.sql | 22 --- .../sql-tests/results/misc-functions.sql.out | 134 ----------------- .../utils/velox/VeloxTestSettings.scala | 5 - .../sql/GlutenColumnExpressionSuite.scala | 51 +------ .../spark/sql/GlutenSQLQueryTestSuite.scala | 52 +------ .../sql-tests/inputs/misc-functions.sql | 22 --- .../sql-tests/results/misc-functions.sql.out | 134 ----------------- .../utils/velox/VeloxTestSettings.scala | 5 - .../sql/GlutenColumnExpressionSuite.scala | 51 +------ .../spark/sql/GlutenSQLQueryTestSuite.scala | 52 +------ .../gluten/expression/ExpressionNames.scala | 1 - 25 files changed, 12 insertions(+), 1062 deletions(-) delete mode 100644 gluten-ut/spark32/src/test/resources/sql-tests/inputs/misc-functions.sql delete mode 100644 gluten-ut/spark32/src/test/resources/sql-tests/results/misc-functions.sql.out delete mode 100644 gluten-ut/spark33/src/test/resources/sql-tests/inputs/misc-functions.sql delete mode 100644 gluten-ut/spark33/src/test/resources/sql-tests/results/misc-functions.sql.out delete mode 100644 gluten-ut/spark34/src/test/resources/sql-tests/inputs/misc-functions.sql delete mode 100644 gluten-ut/spark34/src/test/resources/sql-tests/results/misc-functions.sql.out delete mode 100644 gluten-ut/spark35/src/test/resources/sql-tests/inputs/misc-functions.sql delete mode 100644 gluten-ut/spark35/src/test/resources/sql-tests/results/misc-functions.sql.out diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala index d65de1cea151d..ac03a7a5b0b90 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala @@ -215,9 +215,6 @@ object CHExpressionUtil { UNIX_MICROS -> DefaultValidator(), TIMESTAMP_MILLIS -> DefaultValidator(), TIMESTAMP_MICROS -> DefaultValidator(), - STACK -> DefaultValidator(), - TRANSFORM_KEYS -> DefaultValidator(), - TRANSFORM_VALUES -> DefaultValidator(), - RAISE_ERROR -> DefaultValidator() + STACK -> DefaultValidator() ) } diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala index 2b9d0173846a8..e13ebd971ef55 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala @@ -22,6 +22,7 @@ import org.apache.gluten.datasource.ArrowConvertorRule import org.apache.gluten.exception.GlutenNotSupportException import org.apache.gluten.execution._ import org.apache.gluten.expression._ +import org.apache.gluten.expression.ExpressionNames.{TRANSFORM_KEYS, TRANSFORM_VALUES} import org.apache.gluten.expression.aggregate.{HLLAdapter, VeloxBloomFilterAggregate, VeloxCollectList, VeloxCollectSet} import org.apache.gluten.extension._ import org.apache.gluten.extension.columnar.FallbackTags @@ -834,6 +835,8 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi { Sig[VeloxCollectSet](ExpressionNames.COLLECT_SET), Sig[VeloxBloomFilterMightContain](ExpressionNames.MIGHT_CONTAIN), Sig[VeloxBloomFilterAggregate](ExpressionNames.BLOOM_FILTER_AGG), + Sig[TransformKeys](TRANSFORM_KEYS), + Sig[TransformValues](TRANSFORM_VALUES), // For test purpose. Sig[VeloxDummyExpression](VeloxDummyExpression.VELOX_DUMMY_EXPRESSION) ) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index fc3bf320ec663..e81c956fe11ba 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -16,7 +16,6 @@ */ package org.apache.gluten.execution -import org.apache.spark.SparkException import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.types._ @@ -664,19 +663,6 @@ class ScalarFunctionsValidateSuite extends FunctionsValidateTest { } } - test("Test raise_error, assert_true function") { - runQueryAndCompare("""SELECT assert_true(l_orderkey >= 1), l_orderkey - | from lineitem limit 100""".stripMargin) { - checkGlutenOperatorMatch[ProjectExecTransformer] - } - val e = intercept[SparkException] { - sql("""SELECT assert_true(l_orderkey >= 100), l_orderkey from - | lineitem limit 100""".stripMargin).collect() - } - assert(e.getCause.isInstanceOf[RuntimeException]) - assert(e.getMessage.contains("l_orderkey")) - } - test("Test E function") { runQueryAndCompare("""SELECT E() from lineitem limit 100""".stripMargin) { checkGlutenOperatorMatch[ProjectExecTransformer] diff --git a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala index 77e85b3548665..51e78a97e9979 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala @@ -261,8 +261,6 @@ object ExpressionMappings { Sig[MapEntries](MAP_ENTRIES), Sig[MapZipWith](MAP_ZIP_WITH), Sig[StringToMap](STR_TO_MAP), - Sig[TransformKeys](TRANSFORM_KEYS), - Sig[TransformValues](TRANSFORM_VALUES), // Struct functions Sig[GetStructField](GET_STRUCT_FIELD), Sig[CreateNamedStruct](NAMED_STRUCT), @@ -286,7 +284,6 @@ object ExpressionMappings { Sig[SparkPartitionID](SPARK_PARTITION_ID), Sig[WidthBucket](WIDTH_BUCKET), Sig[ReplicateRows](REPLICATE_ROWS), - Sig[RaiseError](RAISE_ERROR), // Decimal Sig[UnscaledValue](UNSCALED_VALUE), // Generator function diff --git a/gluten-ut/spark32/src/test/resources/sql-tests/inputs/misc-functions.sql b/gluten-ut/spark32/src/test/resources/sql-tests/inputs/misc-functions.sql deleted file mode 100644 index 907ff33000d8e..0000000000000 --- a/gluten-ut/spark32/src/test/resources/sql-tests/inputs/misc-functions.sql +++ /dev/null @@ -1,22 +0,0 @@ --- test for misc functions - --- typeof -select typeof(null); -select typeof(true); -select typeof(1Y), typeof(1S), typeof(1), typeof(1L); -select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2); -select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days'); -select typeof(x'ABCD'), typeof('SPARK'); -select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark')); - --- Spark-32793: Rewrite AssertTrue with RaiseError -SELECT assert_true(true), assert_true(boolean(1)); -SELECT assert_true(false); -SELECT assert_true(boolean(0)); -SELECT assert_true(null); -SELECT assert_true(boolean(null)); -SELECT assert_true(false, 'custom error message'); - -CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v); -SELECT raise_error('error message'); -SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc; diff --git a/gluten-ut/spark32/src/test/resources/sql-tests/results/misc-functions.sql.out b/gluten-ut/spark32/src/test/resources/sql-tests/results/misc-functions.sql.out deleted file mode 100644 index 6985233c33187..0000000000000 --- a/gluten-ut/spark32/src/test/resources/sql-tests/results/misc-functions.sql.out +++ /dev/null @@ -1,137 +0,0 @@ --- Automatically generated by SQLQueryTestSuite --- Number of queries: 16 - - --- !query -select typeof(null) --- !query schema -struct --- !query output -void - - --- !query -select typeof(true) --- !query schema -struct --- !query output -boolean - - --- !query -select typeof(1Y), typeof(1S), typeof(1), typeof(1L) --- !query schema -struct --- !query output -tinyint smallint int bigint - - --- !query -select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2) --- !query schema -struct --- !query output -float double decimal(2,1) - - --- !query -select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days') --- !query schema -struct --- !query output -date timestamp interval day - - --- !query -select typeof(x'ABCD'), typeof('SPARK') --- !query schema -struct --- !query output -binary string - - --- !query -select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark')) --- !query schema -struct --- !query output -array map struct - - --- !query -SELECT assert_true(true), assert_true(boolean(1)) --- !query schema -struct --- !query output -NULL NULL - - --- !query -SELECT assert_true(false) --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -'false' is not true! - - --- !query -SELECT assert_true(boolean(0)) --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -'cast(0 as boolean)' is not true! - - --- !query -SELECT assert_true(null) --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -'null' is not true! - - --- !query -SELECT assert_true(boolean(null)) --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -'cast(null as boolean)' is not true! - - --- !query -SELECT assert_true(false, 'custom error message') --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -custom error message - - --- !query -CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v) --- !query schema -struct<> --- !query output - - - --- !query -SELECT raise_error('error message') --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -error message - - --- !query -SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -too big: 8 diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index d5e8df63869ef..a17f72de31214 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -296,11 +296,6 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("sliding range between with aggregation") .exclude("store and retrieve column stats in different time zones") enableSuite[GlutenColumnExpressionSuite] - // Velox raise_error('errMsg') throws a velox_user_error exception with the message 'errMsg'. - // The final caught Spark exception's getCause().getMessage() contains 'errMsg' but does not - // equal 'errMsg' exactly. The following two tests will be skipped and overridden in Gluten. - .exclude("raise_error") - .exclude("assert_true") enableSuite[GlutenDataFrameImplicitsSuite] enableSuite[GlutenGeneratorFunctionSuite] enableSuite[GlutenDataFrameTimeWindowingSuite] diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala index 437cef29215ce..da22e60f932d6 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala @@ -16,60 +16,11 @@ */ package org.apache.spark.sql -import org.apache.spark.SparkException import org.apache.spark.sql.execution.ProjectExec -import org.apache.spark.sql.functions.{assert_true, expr, input_file_name, lit, raise_error} +import org.apache.spark.sql.functions.{expr, input_file_name} class GlutenColumnExpressionSuite extends ColumnExpressionSuite with GlutenSQLTestsTrait { import testImplicits._ - testGluten("raise_error") { - val strDf = Seq(("hello")).toDF("a") - - val e1 = intercept[SparkException] { - strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect() - } - assert(e1.getCause.isInstanceOf[RuntimeException]) - - val e2 = intercept[SparkException] { - strDf.select(raise_error($"a")).collect() - } - assert(e2.getCause.isInstanceOf[RuntimeException]) - assert(e2.getCause.getMessage contains "hello") - } - - testGluten("assert_true") { - // assert_true(condition, errMsgCol) - val booleanDf = Seq((true), (false)).toDF("cond") - checkAnswer( - booleanDf.filter("cond = true").select(assert_true($"cond")), - Row(null) :: Nil - ) - val e1 = intercept[SparkException] { - booleanDf.select(assert_true($"cond", lit(null.asInstanceOf[String]))).collect() - } - assert(e1.getCause.isInstanceOf[RuntimeException]) - - val nullDf = Seq(("first row", None), ("second row", Some(true))).toDF("n", "cond") - checkAnswer( - nullDf.filter("cond = true").select(assert_true($"cond", $"cond")), - Row(null) :: Nil - ) - val e2 = intercept[SparkException] { - nullDf.select(assert_true($"cond", $"n")).collect() - } - assert(e2.getCause.isInstanceOf[RuntimeException]) - assert(e2.getCause.getMessage contains "first row") - - // assert_true(condition) - val intDf = Seq((0, 1)).toDF("a", "b") - checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil) - val e3 = intercept[SparkException] { - intDf.select(assert_true($"a" > $"b")).collect() - } - assert(e3.getCause.isInstanceOf[RuntimeException]) - assert(e3.getCause.getMessage contains "'('a > 'b)' is not true!") - } - testGluten( "input_file_name, input_file_block_start and input_file_block_length " + "should fall back if scan falls back") { diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala index 4fbd89bda05c7..4b75ce13c0678 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala @@ -17,10 +17,9 @@ package org.apache.spark.sql import org.apache.gluten.GlutenConfig -import org.apache.gluten.exception.GlutenException import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils, SystemParameters} -import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.SparkConf import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.rules.RuleExecutor @@ -40,7 +39,6 @@ import java.util.Locale import scala.collection.mutable.ArrayBuffer import scala.sys.process.{Process, ProcessLogger} import scala.util.Try -import scala.util.control.NonFatal /** * End-to-end test cases for SQL queries. @@ -763,45 +761,4 @@ class GlutenSQLQueryTestSuite super.afterAll() } } - - /** - * This method handles exceptions occurred during query execution as they may need special care to - * become comparable to the expected output. - * - * @param result - * a function that returns a pair of schema and output - */ - override protected def handleExceptions( - result: => (String, Seq[String])): (String, Seq[String]) = { - try { - result - } catch { - case a: AnalysisException => - // Do not output the logical plan tree which contains expression IDs. - // Also implement a crude way of masking expression IDs in the error message - // with a generic pattern "###". - val msg = if (a.plan.nonEmpty) a.getSimpleMessage else a.getMessage - (emptySchema, Seq(a.getClass.getName, msg.replaceAll("#\\d+", "#x"))) - case s: SparkException if s.getCause != null => - // For a runtime exception, it is hard to match because its message contains - // information of stage, task ID, etc. - // To make result matching simpler, here we match the cause of the exception if it exists. - s.getCause match { - case e: GlutenException => - val reasonPattern = "Reason: (.*)".r - val reason = reasonPattern.findFirstMatchIn(e.getMessage).map(_.group(1)) - - reason match { - case Some(r) => - (emptySchema, Seq(e.getClass.getName, r)) - case None => (emptySchema, Seq()) - } - case cause => - (emptySchema, Seq(cause.getClass.getName, cause.getMessage)) - } - case NonFatal(e) => - // If there is an exception, put the exception class followed by the message. - (emptySchema, Seq(e.getClass.getName, e.getMessage)) - } - } } diff --git a/gluten-ut/spark33/src/test/resources/sql-tests/inputs/misc-functions.sql b/gluten-ut/spark33/src/test/resources/sql-tests/inputs/misc-functions.sql deleted file mode 100644 index 907ff33000d8e..0000000000000 --- a/gluten-ut/spark33/src/test/resources/sql-tests/inputs/misc-functions.sql +++ /dev/null @@ -1,22 +0,0 @@ --- test for misc functions - --- typeof -select typeof(null); -select typeof(true); -select typeof(1Y), typeof(1S), typeof(1), typeof(1L); -select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2); -select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days'); -select typeof(x'ABCD'), typeof('SPARK'); -select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark')); - --- Spark-32793: Rewrite AssertTrue with RaiseError -SELECT assert_true(true), assert_true(boolean(1)); -SELECT assert_true(false); -SELECT assert_true(boolean(0)); -SELECT assert_true(null); -SELECT assert_true(boolean(null)); -SELECT assert_true(false, 'custom error message'); - -CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v); -SELECT raise_error('error message'); -SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc; diff --git a/gluten-ut/spark33/src/test/resources/sql-tests/results/misc-functions.sql.out b/gluten-ut/spark33/src/test/resources/sql-tests/results/misc-functions.sql.out deleted file mode 100644 index 6985233c33187..0000000000000 --- a/gluten-ut/spark33/src/test/resources/sql-tests/results/misc-functions.sql.out +++ /dev/null @@ -1,137 +0,0 @@ --- Automatically generated by SQLQueryTestSuite --- Number of queries: 16 - - --- !query -select typeof(null) --- !query schema -struct --- !query output -void - - --- !query -select typeof(true) --- !query schema -struct --- !query output -boolean - - --- !query -select typeof(1Y), typeof(1S), typeof(1), typeof(1L) --- !query schema -struct --- !query output -tinyint smallint int bigint - - --- !query -select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2) --- !query schema -struct --- !query output -float double decimal(2,1) - - --- !query -select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days') --- !query schema -struct --- !query output -date timestamp interval day - - --- !query -select typeof(x'ABCD'), typeof('SPARK') --- !query schema -struct --- !query output -binary string - - --- !query -select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark')) --- !query schema -struct --- !query output -array map struct - - --- !query -SELECT assert_true(true), assert_true(boolean(1)) --- !query schema -struct --- !query output -NULL NULL - - --- !query -SELECT assert_true(false) --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -'false' is not true! - - --- !query -SELECT assert_true(boolean(0)) --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -'cast(0 as boolean)' is not true! - - --- !query -SELECT assert_true(null) --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -'null' is not true! - - --- !query -SELECT assert_true(boolean(null)) --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -'cast(null as boolean)' is not true! - - --- !query -SELECT assert_true(false, 'custom error message') --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -custom error message - - --- !query -CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v) --- !query schema -struct<> --- !query output - - - --- !query -SELECT raise_error('error message') --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -error message - - --- !query -SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -too big: 8 diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index fcc2bd3432623..ae3e7c7b8e9d7 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -950,11 +950,6 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenFileSourceCharVarcharTestSuite] enableSuite[GlutenDSV2CharVarcharTestSuite] enableSuite[GlutenColumnExpressionSuite] - // Velox raise_error('errMsg') throws a velox_user_error exception with the message 'errMsg'. - // The final caught Spark exception's getCause().getMessage() contains 'errMsg' but does not - // equal 'errMsg' exactly. The following two tests will be skipped and overridden in Gluten. - .exclude("raise_error") - .exclude("assert_true") enableSuite[GlutenComplexTypeSuite] enableSuite[GlutenConfigBehaviorSuite] // Will be fixed by cleaning up ColumnarShuffleExchangeExec. diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala index 437cef29215ce..da22e60f932d6 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala @@ -16,60 +16,11 @@ */ package org.apache.spark.sql -import org.apache.spark.SparkException import org.apache.spark.sql.execution.ProjectExec -import org.apache.spark.sql.functions.{assert_true, expr, input_file_name, lit, raise_error} +import org.apache.spark.sql.functions.{expr, input_file_name} class GlutenColumnExpressionSuite extends ColumnExpressionSuite with GlutenSQLTestsTrait { import testImplicits._ - testGluten("raise_error") { - val strDf = Seq(("hello")).toDF("a") - - val e1 = intercept[SparkException] { - strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect() - } - assert(e1.getCause.isInstanceOf[RuntimeException]) - - val e2 = intercept[SparkException] { - strDf.select(raise_error($"a")).collect() - } - assert(e2.getCause.isInstanceOf[RuntimeException]) - assert(e2.getCause.getMessage contains "hello") - } - - testGluten("assert_true") { - // assert_true(condition, errMsgCol) - val booleanDf = Seq((true), (false)).toDF("cond") - checkAnswer( - booleanDf.filter("cond = true").select(assert_true($"cond")), - Row(null) :: Nil - ) - val e1 = intercept[SparkException] { - booleanDf.select(assert_true($"cond", lit(null.asInstanceOf[String]))).collect() - } - assert(e1.getCause.isInstanceOf[RuntimeException]) - - val nullDf = Seq(("first row", None), ("second row", Some(true))).toDF("n", "cond") - checkAnswer( - nullDf.filter("cond = true").select(assert_true($"cond", $"cond")), - Row(null) :: Nil - ) - val e2 = intercept[SparkException] { - nullDf.select(assert_true($"cond", $"n")).collect() - } - assert(e2.getCause.isInstanceOf[RuntimeException]) - assert(e2.getCause.getMessage contains "first row") - - // assert_true(condition) - val intDf = Seq((0, 1)).toDF("a", "b") - checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil) - val e3 = intercept[SparkException] { - intDf.select(assert_true($"a" > $"b")).collect() - } - assert(e3.getCause.isInstanceOf[RuntimeException]) - assert(e3.getCause.getMessage contains "'('a > 'b)' is not true!") - } - testGluten( "input_file_name, input_file_block_start and input_file_block_length " + "should fall back if scan falls back") { diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala index 6e2a9efa87c07..4536aa54057c0 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala @@ -17,10 +17,9 @@ package org.apache.spark.sql import org.apache.gluten.GlutenConfig -import org.apache.gluten.exception.GlutenException import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils, SystemParameters} -import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.SparkConf import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.rules.RuleExecutor @@ -40,7 +39,6 @@ import java.util.Locale import scala.collection.mutable.ArrayBuffer import scala.sys.process.{Process, ProcessLogger} import scala.util.Try -import scala.util.control.NonFatal /** * End-to-end test cases for SQL queries. @@ -763,45 +761,4 @@ class GlutenSQLQueryTestSuite super.afterAll() } } - - /** - * This method handles exceptions occurred during query execution as they may need special care to - * become comparable to the expected output. - * - * @param result - * a function that returns a pair of schema and output - */ - override protected def handleExceptions( - result: => (String, Seq[String])): (String, Seq[String]) = { - try { - result - } catch { - case a: AnalysisException => - // Do not output the logical plan tree which contains expression IDs. - // Also implement a crude way of masking expression IDs in the error message - // with a generic pattern "###". - val msg = if (a.plan.nonEmpty) a.getSimpleMessage else a.getMessage - (emptySchema, Seq(a.getClass.getName, msg.replaceAll("#\\d+", "#x"))) - case s: SparkException if s.getCause != null => - // For a runtime exception, it is hard to match because its message contains - // information of stage, task ID, etc. - // To make result matching simpler, here we match the cause of the exception if it exists. - val cause = s.getCause - cause match { - case e: GlutenException => - val reasonPattern = "Reason: (.*)".r - val reason = reasonPattern.findFirstMatchIn(e.getMessage).map(_.group(1)) - - reason match { - case Some(r) => - (emptySchema, Seq(e.getClass.getName, r)) - case None => (emptySchema, Seq()) - } - case _ => (emptySchema, Seq(cause.getClass.getName, cause.getMessage)) - } - case NonFatal(e) => - // If there is an exception, put the exception class followed by the message. - (emptySchema, Seq(e.getClass.getName, e.getMessage)) - } - } } diff --git a/gluten-ut/spark34/src/test/resources/sql-tests/inputs/misc-functions.sql b/gluten-ut/spark34/src/test/resources/sql-tests/inputs/misc-functions.sql deleted file mode 100644 index 907ff33000d8e..0000000000000 --- a/gluten-ut/spark34/src/test/resources/sql-tests/inputs/misc-functions.sql +++ /dev/null @@ -1,22 +0,0 @@ --- test for misc functions - --- typeof -select typeof(null); -select typeof(true); -select typeof(1Y), typeof(1S), typeof(1), typeof(1L); -select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2); -select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days'); -select typeof(x'ABCD'), typeof('SPARK'); -select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark')); - --- Spark-32793: Rewrite AssertTrue with RaiseError -SELECT assert_true(true), assert_true(boolean(1)); -SELECT assert_true(false); -SELECT assert_true(boolean(0)); -SELECT assert_true(null); -SELECT assert_true(boolean(null)); -SELECT assert_true(false, 'custom error message'); - -CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v); -SELECT raise_error('error message'); -SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc; diff --git a/gluten-ut/spark34/src/test/resources/sql-tests/results/misc-functions.sql.out b/gluten-ut/spark34/src/test/resources/sql-tests/results/misc-functions.sql.out deleted file mode 100644 index d6d1289a5adb7..0000000000000 --- a/gluten-ut/spark34/src/test/resources/sql-tests/results/misc-functions.sql.out +++ /dev/null @@ -1,134 +0,0 @@ --- Automatically generated by SQLQueryTestSuite --- !query -select typeof(null) --- !query schema -struct --- !query output -void - - --- !query -select typeof(true) --- !query schema -struct --- !query output -boolean - - --- !query -select typeof(1Y), typeof(1S), typeof(1), typeof(1L) --- !query schema -struct --- !query output -tinyint smallint int bigint - - --- !query -select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2) --- !query schema -struct --- !query output -float double decimal(2,1) - - --- !query -select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days') --- !query schema -struct --- !query output -date timestamp interval day - - --- !query -select typeof(x'ABCD'), typeof('SPARK') --- !query schema -struct --- !query output -binary string - - --- !query -select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark')) --- !query schema -struct --- !query output -array map struct - - --- !query -SELECT assert_true(true), assert_true(boolean(1)) --- !query schema -struct --- !query output -NULL NULL - - --- !query -SELECT assert_true(false) --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -'false' is not true! - - --- !query -SELECT assert_true(boolean(0)) --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -'cast(0 as boolean)' is not true! - - --- !query -SELECT assert_true(null) --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -'null' is not true! - - --- !query -SELECT assert_true(boolean(null)) --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -'cast(null as boolean)' is not true! - - --- !query -SELECT assert_true(false, 'custom error message') --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -custom error message - - --- !query -CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v) --- !query schema -struct<> --- !query output - - - --- !query -SELECT raise_error('error message') --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -error message - - --- !query -SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -too big: 8 diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 57346f493945b..0da19922ffdaf 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -955,11 +955,6 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenFileSourceCharVarcharTestSuite] enableSuite[GlutenDSV2CharVarcharTestSuite] enableSuite[GlutenColumnExpressionSuite] - // Velox raise_error('errMsg') throws a velox_user_error exception with the message 'errMsg'. - // The final caught Spark exception's getCause().getMessage() contains 'errMsg' but does not - // equal 'errMsg' exactly. The following two tests will be skipped and overridden in Gluten. - .exclude("raise_error") - .exclude("assert_true") enableSuite[GlutenComplexTypeSuite] enableSuite[GlutenConfigBehaviorSuite] // Will be fixed by cleaning up ColumnarShuffleExchangeExec. diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala index 437cef29215ce..da22e60f932d6 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala @@ -16,60 +16,11 @@ */ package org.apache.spark.sql -import org.apache.spark.SparkException import org.apache.spark.sql.execution.ProjectExec -import org.apache.spark.sql.functions.{assert_true, expr, input_file_name, lit, raise_error} +import org.apache.spark.sql.functions.{expr, input_file_name} class GlutenColumnExpressionSuite extends ColumnExpressionSuite with GlutenSQLTestsTrait { import testImplicits._ - testGluten("raise_error") { - val strDf = Seq(("hello")).toDF("a") - - val e1 = intercept[SparkException] { - strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect() - } - assert(e1.getCause.isInstanceOf[RuntimeException]) - - val e2 = intercept[SparkException] { - strDf.select(raise_error($"a")).collect() - } - assert(e2.getCause.isInstanceOf[RuntimeException]) - assert(e2.getCause.getMessage contains "hello") - } - - testGluten("assert_true") { - // assert_true(condition, errMsgCol) - val booleanDf = Seq((true), (false)).toDF("cond") - checkAnswer( - booleanDf.filter("cond = true").select(assert_true($"cond")), - Row(null) :: Nil - ) - val e1 = intercept[SparkException] { - booleanDf.select(assert_true($"cond", lit(null.asInstanceOf[String]))).collect() - } - assert(e1.getCause.isInstanceOf[RuntimeException]) - - val nullDf = Seq(("first row", None), ("second row", Some(true))).toDF("n", "cond") - checkAnswer( - nullDf.filter("cond = true").select(assert_true($"cond", $"cond")), - Row(null) :: Nil - ) - val e2 = intercept[SparkException] { - nullDf.select(assert_true($"cond", $"n")).collect() - } - assert(e2.getCause.isInstanceOf[RuntimeException]) - assert(e2.getCause.getMessage contains "first row") - - // assert_true(condition) - val intDf = Seq((0, 1)).toDF("a", "b") - checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil) - val e3 = intercept[SparkException] { - intDf.select(assert_true($"a" > $"b")).collect() - } - assert(e3.getCause.isInstanceOf[RuntimeException]) - assert(e3.getCause.getMessage contains "'('a > 'b)' is not true!") - } - testGluten( "input_file_name, input_file_block_start and input_file_block_length " + "should fall back if scan falls back") { diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala index 8a291990ea31f..0ea1f13ec2efa 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala @@ -17,12 +17,9 @@ package org.apache.spark.sql import org.apache.gluten.GlutenConfig -import org.apache.gluten.exception.GlutenException import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils, SystemParameters} -import org.apache.spark.{SparkConf, SparkException, SparkThrowable} -import org.apache.spark.ErrorMessageFormat.MINIMAL -import org.apache.spark.SparkThrowableHelper.getMessage +import org.apache.spark.SparkConf import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.rules.RuleExecutor @@ -43,7 +40,6 @@ import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.sys.process.{Process, ProcessLogger} import scala.util.Try -import scala.util.control.NonFatal /** * End-to-end test cases for SQL queries. @@ -785,50 +781,4 @@ class GlutenSQLQueryTestSuite super.afterAll() } } - - /** - * This method handles exceptions occurred during query execution as they may need special care to - * become comparable to the expected output. - * - * @param result - * a function that returns a pair of schema and output - */ - override protected def handleExceptions( - result: => (String, Seq[String])): (String, Seq[String]) = { - val format = MINIMAL - try { - result - } catch { - case e: SparkThrowable with Throwable if e.getErrorClass != null => - (emptySchema, Seq(e.getClass.getName, getMessage(e, format))) - case a: AnalysisException => - // Do not output the logical plan tree which contains expression IDs. - // Also implement a crude way of masking expression IDs in the error message - // with a generic pattern "###". - (emptySchema, Seq(a.getClass.getName, a.getSimpleMessage.replaceAll("#\\d+", "#x"))) - case s: SparkException if s.getCause != null => - // For a runtime exception, it is hard to match because its message contains - // information of stage, task ID, etc. - // To make result matching simpler, here we match the cause of the exception if it exists. - s.getCause match { - case e: SparkThrowable with Throwable if e.getErrorClass != null => - (emptySchema, Seq(e.getClass.getName, getMessage(e, format))) - case e: GlutenException => - val reasonPattern = "Reason: (.*)".r - val reason = reasonPattern.findFirstMatchIn(e.getMessage).map(_.group(1)) - - reason match { - case Some(r) => - (emptySchema, Seq(e.getClass.getName, r)) - case None => (emptySchema, Seq()) - } - - case cause => - (emptySchema, Seq(cause.getClass.getName, cause.getMessage)) - } - case NonFatal(e) => - // If there is an exception, put the exception class followed by the message. - (emptySchema, Seq(e.getClass.getName, e.getMessage)) - } - } } diff --git a/gluten-ut/spark35/src/test/resources/sql-tests/inputs/misc-functions.sql b/gluten-ut/spark35/src/test/resources/sql-tests/inputs/misc-functions.sql deleted file mode 100644 index 907ff33000d8e..0000000000000 --- a/gluten-ut/spark35/src/test/resources/sql-tests/inputs/misc-functions.sql +++ /dev/null @@ -1,22 +0,0 @@ --- test for misc functions - --- typeof -select typeof(null); -select typeof(true); -select typeof(1Y), typeof(1S), typeof(1), typeof(1L); -select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2); -select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days'); -select typeof(x'ABCD'), typeof('SPARK'); -select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark')); - --- Spark-32793: Rewrite AssertTrue with RaiseError -SELECT assert_true(true), assert_true(boolean(1)); -SELECT assert_true(false); -SELECT assert_true(boolean(0)); -SELECT assert_true(null); -SELECT assert_true(boolean(null)); -SELECT assert_true(false, 'custom error message'); - -CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v); -SELECT raise_error('error message'); -SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc; diff --git a/gluten-ut/spark35/src/test/resources/sql-tests/results/misc-functions.sql.out b/gluten-ut/spark35/src/test/resources/sql-tests/results/misc-functions.sql.out deleted file mode 100644 index d6d1289a5adb7..0000000000000 --- a/gluten-ut/spark35/src/test/resources/sql-tests/results/misc-functions.sql.out +++ /dev/null @@ -1,134 +0,0 @@ --- Automatically generated by SQLQueryTestSuite --- !query -select typeof(null) --- !query schema -struct --- !query output -void - - --- !query -select typeof(true) --- !query schema -struct --- !query output -boolean - - --- !query -select typeof(1Y), typeof(1S), typeof(1), typeof(1L) --- !query schema -struct --- !query output -tinyint smallint int bigint - - --- !query -select typeof(cast(1.0 as float)), typeof(1.0D), typeof(1.2) --- !query schema -struct --- !query output -float double decimal(2,1) - - --- !query -select typeof(date '1986-05-23'), typeof(timestamp '1986-05-23'), typeof(interval '23 days') --- !query schema -struct --- !query output -date timestamp interval day - - --- !query -select typeof(x'ABCD'), typeof('SPARK') --- !query schema -struct --- !query output -binary string - - --- !query -select typeof(array(1, 2)), typeof(map(1, 2)), typeof(named_struct('a', 1, 'b', 'spark')) --- !query schema -struct --- !query output -array map struct - - --- !query -SELECT assert_true(true), assert_true(boolean(1)) --- !query schema -struct --- !query output -NULL NULL - - --- !query -SELECT assert_true(false) --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -'false' is not true! - - --- !query -SELECT assert_true(boolean(0)) --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -'cast(0 as boolean)' is not true! - - --- !query -SELECT assert_true(null) --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -'null' is not true! - - --- !query -SELECT assert_true(boolean(null)) --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -'cast(null as boolean)' is not true! - - --- !query -SELECT assert_true(false, 'custom error message') --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -custom error message - - --- !query -CREATE TEMPORARY VIEW tbl_misc AS SELECT * FROM (VALUES (1), (8), (2)) AS T(v) --- !query schema -struct<> --- !query output - - - --- !query -SELECT raise_error('error message') --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -error message - - --- !query -SELECT if(v > 5, raise_error('too big: ' || v), v + 1) FROM tbl_misc --- !query schema -struct<> --- !query output -org.apache.gluten.exception.GlutenException -too big: 8 diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 9716a7c143742..e54aca34ec757 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -969,11 +969,6 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenFileSourceCharVarcharTestSuite] enableSuite[GlutenDSV2CharVarcharTestSuite] enableSuite[GlutenColumnExpressionSuite] - // Velox raise_error('errMsg') throws a velox_user_error exception with the message 'errMsg'. - // The final caught Spark exception's getCause().getMessage() contains 'errMsg' but does not - // equal 'errMsg' exactly. The following two tests will be skipped and overridden in Gluten. - .exclude("raise_error") - .exclude("assert_true") enableSuite[GlutenComplexTypeSuite] enableSuite[GlutenConfigBehaviorSuite] // Will be fixed by cleaning up ColumnarShuffleExchangeExec. diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala index 437cef29215ce..da22e60f932d6 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala @@ -16,60 +16,11 @@ */ package org.apache.spark.sql -import org.apache.spark.SparkException import org.apache.spark.sql.execution.ProjectExec -import org.apache.spark.sql.functions.{assert_true, expr, input_file_name, lit, raise_error} +import org.apache.spark.sql.functions.{expr, input_file_name} class GlutenColumnExpressionSuite extends ColumnExpressionSuite with GlutenSQLTestsTrait { import testImplicits._ - testGluten("raise_error") { - val strDf = Seq(("hello")).toDF("a") - - val e1 = intercept[SparkException] { - strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect() - } - assert(e1.getCause.isInstanceOf[RuntimeException]) - - val e2 = intercept[SparkException] { - strDf.select(raise_error($"a")).collect() - } - assert(e2.getCause.isInstanceOf[RuntimeException]) - assert(e2.getCause.getMessage contains "hello") - } - - testGluten("assert_true") { - // assert_true(condition, errMsgCol) - val booleanDf = Seq((true), (false)).toDF("cond") - checkAnswer( - booleanDf.filter("cond = true").select(assert_true($"cond")), - Row(null) :: Nil - ) - val e1 = intercept[SparkException] { - booleanDf.select(assert_true($"cond", lit(null.asInstanceOf[String]))).collect() - } - assert(e1.getCause.isInstanceOf[RuntimeException]) - - val nullDf = Seq(("first row", None), ("second row", Some(true))).toDF("n", "cond") - checkAnswer( - nullDf.filter("cond = true").select(assert_true($"cond", $"cond")), - Row(null) :: Nil - ) - val e2 = intercept[SparkException] { - nullDf.select(assert_true($"cond", $"n")).collect() - } - assert(e2.getCause.isInstanceOf[RuntimeException]) - assert(e2.getCause.getMessage contains "first row") - - // assert_true(condition) - val intDf = Seq((0, 1)).toDF("a", "b") - checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil) - val e3 = intercept[SparkException] { - intDf.select(assert_true($"a" > $"b")).collect() - } - assert(e3.getCause.isInstanceOf[RuntimeException]) - assert(e3.getCause.getMessage contains "'('a > 'b)' is not true!") - } - testGluten( "input_file_name, input_file_block_start and input_file_block_length " + "should fall back if scan falls back") { diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala index 8a6f5f32f8919..b1f3945bf1920 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala @@ -17,12 +17,9 @@ package org.apache.spark.sql import org.apache.gluten.GlutenConfig -import org.apache.gluten.exception.GlutenException import org.apache.gluten.utils.{BackendTestSettings, BackendTestUtils, SystemParameters} -import org.apache.spark.{SparkConf, SparkException, SparkThrowable} -import org.apache.spark.ErrorMessageFormat.MINIMAL -import org.apache.spark.SparkThrowableHelper.getMessage +import org.apache.spark.SparkConf import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.catalyst.rules.RuleExecutor @@ -43,7 +40,6 @@ import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import scala.sys.process.{Process, ProcessLogger} import scala.util.Try -import scala.util.control.NonFatal /** * End-to-end test cases for SQL queries. @@ -787,50 +783,4 @@ class GlutenSQLQueryTestSuite super.afterAll() } } - - /** - * This method handles exceptions occurred during query execution as they may need special care to - * become comparable to the expected output. - * - * @param result - * a function that returns a pair of schema and output - */ - override protected def handleExceptions( - result: => (String, Seq[String])): (String, Seq[String]) = { - val format = MINIMAL - try { - result - } catch { - case e: SparkThrowable with Throwable if e.getErrorClass != null => - (emptySchema, Seq(e.getClass.getName, getMessage(e, format))) - case a: AnalysisException => - // Do not output the logical plan tree which contains expression IDs. - // Also implement a crude way of masking expression IDs in the error message - // with a generic pattern "###". - (emptySchema, Seq(a.getClass.getName, a.getSimpleMessage.replaceAll("#\\d+", "#x"))) - case s: SparkException if s.getCause != null => - // For a runtime exception, it is hard to match because its message contains - // information of stage, task ID, etc. - // To make result matching simpler, here we match the cause of the exception if it exists. - s.getCause match { - case e: SparkThrowable with Throwable if e.getErrorClass != null => - (emptySchema, Seq(e.getClass.getName, getMessage(e, format))) - case e: GlutenException => - val reasonPattern = "Reason: (.*)".r - val reason = reasonPattern.findFirstMatchIn(e.getMessage).map(_.group(1)) - - reason match { - case Some(r) => - (emptySchema, Seq(e.getClass.getName, r)) - case None => (emptySchema, Seq()) - } - - case cause => - (emptySchema, Seq(cause.getClass.getName, cause.getMessage)) - } - case NonFatal(e) => - // If there is an exception, put the exception class followed by the message. - (emptySchema, Seq(e.getClass.getName, e.getMessage)) - } - } } diff --git a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala index 41bc86749a8d9..0b31ec346705b 100644 --- a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala +++ b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala @@ -316,7 +316,6 @@ object ExpressionNames { final val MONOTONICALLY_INCREASING_ID = "monotonically_increasing_id" final val WIDTH_BUCKET = "width_bucket" final val REPLICATE_ROWS = "replicaterows" - final val RAISE_ERROR = "raise_error" // Directly use child expression transformer final val KNOWN_NULLABLE = "known_nullable"