From 369170eba48a61567ef8387b87e96f2b26682878 Mon Sep 17 00:00:00 2001 From: gaoyangxiaozhu Date: Sat, 6 Jul 2024 18:16:04 +0800 Subject: [PATCH] fix ut --- .../utils/velox/VeloxTestSettings.scala | 5 ++ .../sql/GlutenColumnExpressionSuite.scala | 52 ++++++++++++++++++- .../utils/velox/VeloxTestSettings.scala | 5 ++ .../sql/GlutenColumnExpressionSuite.scala | 52 ++++++++++++++++++- .../utils/velox/VeloxTestSettings.scala | 5 ++ .../sql/GlutenColumnExpressionSuite.scala | 52 ++++++++++++++++++- .../utils/velox/VeloxTestSettings.scala | 5 ++ .../sql/GlutenColumnExpressionSuite.scala | 52 ++++++++++++++++++- 8 files changed, 224 insertions(+), 4 deletions(-) diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index a17f72de3121..d5e8df63869e 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -296,6 +296,11 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("sliding range between with aggregation") .exclude("store and retrieve column stats in different time zones") enableSuite[GlutenColumnExpressionSuite] + // Velox raise_error('errMsg') throws a velox_user_error exception with the message 'errMsg'. + // The final caught Spark exception's getCause().getMessage() contains 'errMsg' but does not + // equal 'errMsg' exactly. The following two tests will be skipped and overridden in Gluten. + .exclude("raise_error") + .exclude("assert_true") enableSuite[GlutenDataFrameImplicitsSuite] enableSuite[GlutenGeneratorFunctionSuite] enableSuite[GlutenDataFrameTimeWindowingSuite] diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala index a4b530e637af..ccc59163a7ec 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala @@ -16,11 +16,61 @@ */ package org.apache.spark.sql +import org.apache.spark.SparkException import org.apache.spark.sql.execution.ProjectExec -import org.apache.spark.sql.functions.{expr, input_file_name} +import org.apache.spark.sql.functions.{assert_true, expr, input_file_name, lit, raise_error} import org.apache.spark.sql.types.{ArrayType, IntegerType, StringType, StructField, StructType} class GlutenColumnExpressionSuite extends ColumnExpressionSuite with GlutenSQLTestsTrait { + import testImplicits._ + testGluten("raise_error") { + val strDf = Seq(("hello")).toDF("a") + + val e1 = intercept[SparkException] { + strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect() + } + assert(e1.getCause.isInstanceOf[RuntimeException]) + + val e2 = intercept[SparkException] { + strDf.select(raise_error($"a")).collect() + } + assert(e2.getCause.isInstanceOf[RuntimeException]) + assert(e2.getCause.getMessage contains "hello") + } + + testGluten("assert_true") { + // assert_true(condition, errMsgCol) + val booleanDf = Seq((true), (false)).toDF("cond") + checkAnswer( + booleanDf.filter("cond = true").select(assert_true($"cond")), + Row(null) :: Nil + ) + val e1 = intercept[SparkException] { + booleanDf.select(assert_true($"cond", lit(null.asInstanceOf[String]))).collect() + } + assert(e1.getCause.isInstanceOf[RuntimeException]) + + val nullDf = Seq(("first row", None), ("second row", Some(true))).toDF("n", "cond") + checkAnswer( + nullDf.filter("cond = true").select(assert_true($"cond", $"cond")), + Row(null) :: Nil + ) + val e2 = intercept[SparkException] { + nullDf.select(assert_true($"cond", $"n")).collect() + } + assert(e2.getCause.isInstanceOf[RuntimeException]) + assert(e2.getCause.getMessage contains "first row") + + // assert_true(condition) + val intDf = Seq((0, 1)).toDF("a", "b") + checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil) + val e3 = intercept[SparkException] { + intDf.select(assert_true($"a" > $"b")).collect() + } + assert(e3.getCause.isInstanceOf[RuntimeException]) + assert(e3.getCause.getMessage contains "'('a > 'b)' is not true!") + } + testGluten("input_file_name with scan is fallback") { withTempPath { dir => diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index ae3e7c7b8e9d..fcc2bd343262 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -950,6 +950,11 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenFileSourceCharVarcharTestSuite] enableSuite[GlutenDSV2CharVarcharTestSuite] enableSuite[GlutenColumnExpressionSuite] + // Velox raise_error('errMsg') throws a velox_user_error exception with the message 'errMsg'. + // The final caught Spark exception's getCause().getMessage() contains 'errMsg' but does not + // equal 'errMsg' exactly. The following two tests will be skipped and overridden in Gluten. + .exclude("raise_error") + .exclude("assert_true") enableSuite[GlutenComplexTypeSuite] enableSuite[GlutenConfigBehaviorSuite] // Will be fixed by cleaning up ColumnarShuffleExchangeExec. diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala index a4b530e637af..ccc59163a7ec 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala @@ -16,11 +16,61 @@ */ package org.apache.spark.sql +import org.apache.spark.SparkException import org.apache.spark.sql.execution.ProjectExec -import org.apache.spark.sql.functions.{expr, input_file_name} +import org.apache.spark.sql.functions.{assert_true, expr, input_file_name, lit, raise_error} import org.apache.spark.sql.types.{ArrayType, IntegerType, StringType, StructField, StructType} class GlutenColumnExpressionSuite extends ColumnExpressionSuite with GlutenSQLTestsTrait { + import testImplicits._ + testGluten("raise_error") { + val strDf = Seq(("hello")).toDF("a") + + val e1 = intercept[SparkException] { + strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect() + } + assert(e1.getCause.isInstanceOf[RuntimeException]) + + val e2 = intercept[SparkException] { + strDf.select(raise_error($"a")).collect() + } + assert(e2.getCause.isInstanceOf[RuntimeException]) + assert(e2.getCause.getMessage contains "hello") + } + + testGluten("assert_true") { + // assert_true(condition, errMsgCol) + val booleanDf = Seq((true), (false)).toDF("cond") + checkAnswer( + booleanDf.filter("cond = true").select(assert_true($"cond")), + Row(null) :: Nil + ) + val e1 = intercept[SparkException] { + booleanDf.select(assert_true($"cond", lit(null.asInstanceOf[String]))).collect() + } + assert(e1.getCause.isInstanceOf[RuntimeException]) + + val nullDf = Seq(("first row", None), ("second row", Some(true))).toDF("n", "cond") + checkAnswer( + nullDf.filter("cond = true").select(assert_true($"cond", $"cond")), + Row(null) :: Nil + ) + val e2 = intercept[SparkException] { + nullDf.select(assert_true($"cond", $"n")).collect() + } + assert(e2.getCause.isInstanceOf[RuntimeException]) + assert(e2.getCause.getMessage contains "first row") + + // assert_true(condition) + val intDf = Seq((0, 1)).toDF("a", "b") + checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil) + val e3 = intercept[SparkException] { + intDf.select(assert_true($"a" > $"b")).collect() + } + assert(e3.getCause.isInstanceOf[RuntimeException]) + assert(e3.getCause.getMessage contains "'('a > 'b)' is not true!") + } + testGluten("input_file_name with scan is fallback") { withTempPath { dir => diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 0da19922ffda..57346f493945 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -955,6 +955,11 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenFileSourceCharVarcharTestSuite] enableSuite[GlutenDSV2CharVarcharTestSuite] enableSuite[GlutenColumnExpressionSuite] + // Velox raise_error('errMsg') throws a velox_user_error exception with the message 'errMsg'. + // The final caught Spark exception's getCause().getMessage() contains 'errMsg' but does not + // equal 'errMsg' exactly. The following two tests will be skipped and overridden in Gluten. + .exclude("raise_error") + .exclude("assert_true") enableSuite[GlutenComplexTypeSuite] enableSuite[GlutenConfigBehaviorSuite] // Will be fixed by cleaning up ColumnarShuffleExchangeExec. diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala index a4b530e637af..ccc59163a7ec 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala @@ -16,11 +16,61 @@ */ package org.apache.spark.sql +import org.apache.spark.SparkException import org.apache.spark.sql.execution.ProjectExec -import org.apache.spark.sql.functions.{expr, input_file_name} +import org.apache.spark.sql.functions.{assert_true, expr, input_file_name, lit, raise_error} import org.apache.spark.sql.types.{ArrayType, IntegerType, StringType, StructField, StructType} class GlutenColumnExpressionSuite extends ColumnExpressionSuite with GlutenSQLTestsTrait { + import testImplicits._ + testGluten("raise_error") { + val strDf = Seq(("hello")).toDF("a") + + val e1 = intercept[SparkException] { + strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect() + } + assert(e1.getCause.isInstanceOf[RuntimeException]) + + val e2 = intercept[SparkException] { + strDf.select(raise_error($"a")).collect() + } + assert(e2.getCause.isInstanceOf[RuntimeException]) + assert(e2.getCause.getMessage contains "hello") + } + + testGluten("assert_true") { + // assert_true(condition, errMsgCol) + val booleanDf = Seq((true), (false)).toDF("cond") + checkAnswer( + booleanDf.filter("cond = true").select(assert_true($"cond")), + Row(null) :: Nil + ) + val e1 = intercept[SparkException] { + booleanDf.select(assert_true($"cond", lit(null.asInstanceOf[String]))).collect() + } + assert(e1.getCause.isInstanceOf[RuntimeException]) + + val nullDf = Seq(("first row", None), ("second row", Some(true))).toDF("n", "cond") + checkAnswer( + nullDf.filter("cond = true").select(assert_true($"cond", $"cond")), + Row(null) :: Nil + ) + val e2 = intercept[SparkException] { + nullDf.select(assert_true($"cond", $"n")).collect() + } + assert(e2.getCause.isInstanceOf[RuntimeException]) + assert(e2.getCause.getMessage contains "first row") + + // assert_true(condition) + val intDf = Seq((0, 1)).toDF("a", "b") + checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil) + val e3 = intercept[SparkException] { + intDf.select(assert_true($"a" > $"b")).collect() + } + assert(e3.getCause.isInstanceOf[RuntimeException]) + assert(e3.getCause.getMessage contains "'('a > 'b)' is not true!") + } + testGluten("input_file_name with scan is fallback") { withTempPath { dir => diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index e54aca34ec75..9716a7c14374 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -969,6 +969,11 @@ class VeloxTestSettings extends BackendTestSettings { enableSuite[GlutenFileSourceCharVarcharTestSuite] enableSuite[GlutenDSV2CharVarcharTestSuite] enableSuite[GlutenColumnExpressionSuite] + // Velox raise_error('errMsg') throws a velox_user_error exception with the message 'errMsg'. + // The final caught Spark exception's getCause().getMessage() contains 'errMsg' but does not + // equal 'errMsg' exactly. The following two tests will be skipped and overridden in Gluten. + .exclude("raise_error") + .exclude("assert_true") enableSuite[GlutenComplexTypeSuite] enableSuite[GlutenConfigBehaviorSuite] // Will be fixed by cleaning up ColumnarShuffleExchangeExec. diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala index 8a28c4e98a26..d5a44f3620e1 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenColumnExpressionSuite.scala @@ -16,11 +16,61 @@ */ package org.apache.spark.sql +import org.apache.spark.SparkException import org.apache.spark.sql.execution.ProjectExec -import org.apache.spark.sql.functions.{expr, input_file_name} +import org.apache.spark.sql.functions.{assert_true, expr, input_file_name, lit, raise_error} import org.apache.spark.sql.types._ class GlutenColumnExpressionSuite extends ColumnExpressionSuite with GlutenSQLTestsTrait { + import testImplicits._ + testGluten("raise_error") { + val strDf = Seq(("hello")).toDF("a") + + val e1 = intercept[SparkException] { + strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect() + } + assert(e1.getCause.isInstanceOf[RuntimeException]) + + val e2 = intercept[SparkException] { + strDf.select(raise_error($"a")).collect() + } + assert(e2.getCause.isInstanceOf[RuntimeException]) + assert(e2.getCause.getMessage contains "hello") + } + + testGluten("assert_true") { + // assert_true(condition, errMsgCol) + val booleanDf = Seq((true), (false)).toDF("cond") + checkAnswer( + booleanDf.filter("cond = true").select(assert_true($"cond")), + Row(null) :: Nil + ) + val e1 = intercept[SparkException] { + booleanDf.select(assert_true($"cond", lit(null.asInstanceOf[String]))).collect() + } + assert(e1.getCause.isInstanceOf[RuntimeException]) + + val nullDf = Seq(("first row", None), ("second row", Some(true))).toDF("n", "cond") + checkAnswer( + nullDf.filter("cond = true").select(assert_true($"cond", $"cond")), + Row(null) :: Nil + ) + val e2 = intercept[SparkException] { + nullDf.select(assert_true($"cond", $"n")).collect() + } + assert(e2.getCause.isInstanceOf[RuntimeException]) + assert(e2.getCause.getMessage contains "first row") + + // assert_true(condition) + val intDf = Seq((0, 1)).toDF("a", "b") + checkAnswer(intDf.select(assert_true($"a" < $"b")), Row(null) :: Nil) + val e3 = intercept[SparkException] { + intDf.select(assert_true($"a" > $"b")).collect() + } + assert(e3.getCause.isInstanceOf[RuntimeException]) + assert(e3.getCause.getMessage contains "'('a > 'b)' is not true!") + } + testGluten("input_file_name with scan is fallback") { withTempPath { dir =>