From 178430f4ab8ece6966d367a4e0292e5d1381ff7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=AB=98=E9=98=B3=E9=98=B3?= Date: Wed, 17 Jul 2024 17:29:31 +0800 Subject: [PATCH] [Core] Support spark version function (#6469) --- .../apache/gluten/backendsapi/velox/VeloxBackend.scala | 5 +++-- .../execution/ScalarFunctionsValidateSuite.scala | 10 ++++++++++ .../apache/gluten/expression/ExpressionConverter.scala | 3 +++ .../apache/gluten/expression/ExpressionMappings.scala | 1 + .../org/apache/gluten/expression/ExpressionNames.scala | 1 + 5 files changed, 18 insertions(+), 2 deletions(-) diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala index e4efa22055fc..b0692816af8c 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxBackend.scala @@ -27,7 +27,7 @@ import org.apache.gluten.substrait.rel.LocalFilesNode.ReadFileFormat import org.apache.gluten.substrait.rel.LocalFilesNode.ReadFileFormat.{DwrfReadFormat, OrcReadFormat, ParquetReadFormat} import org.apache.spark.sql.catalyst.catalog.BucketSpec -import org.apache.spark.sql.catalyst.expressions.{Alias, CumeDist, DenseRank, Descending, EulerNumber, Expression, Lag, Lead, Literal, MakeYMInterval, NamedExpression, NthValue, NTile, PercentRank, Pi, Rand, RangeFrame, Rank, RowNumber, SortOrder, SparkPartitionID, SpecialFrameBoundary, SpecifiedWindowFrame, Uuid} +import org.apache.spark.sql.catalyst.expressions.{Alias, CumeDist, DenseRank, Descending, EulerNumber, Expression, Lag, Lead, Literal, MakeYMInterval, NamedExpression, NthValue, NTile, PercentRank, Pi, Rand, RangeFrame, Rank, RowNumber, SortOrder, SparkPartitionID, SparkVersion, SpecialFrameBoundary, SpecifiedWindowFrame, Uuid} import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, ApproximatePercentile, Count, Sum} import org.apache.spark.sql.catalyst.plans.{JoinType, LeftOuter, RightOuter} import org.apache.spark.sql.catalyst.util.CharVarcharUtils @@ -430,7 +430,8 @@ object VeloxBackendSettings extends BackendSettingsApi { expr match { // Block directly falling back the below functions by FallbackEmptySchemaRelation. case alias: Alias => checkExpr(alias.child) - case _: Rand | _: Uuid | _: MakeYMInterval | _: SparkPartitionID | _: EulerNumber | _: Pi => + case _: Rand | _: Uuid | _: MakeYMInterval | _: SparkPartitionID | _: EulerNumber | _: Pi | + _: SparkVersion => true case _ => false } diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index 0979df72b33b..12fa3b46d72a 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -716,6 +716,16 @@ class ScalarFunctionsValidateSuite extends FunctionsValidateTest { } } + test("Test version function") { + runQueryAndCompare("""SELECT version() from lineitem limit 100""".stripMargin) { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + runQueryAndCompare("""SELECT version(), l_orderkey + | from lineitem limit 100""".stripMargin) { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + } + test("Test spark_partition_id function") { runQueryAndCompare("""SELECT spark_partition_id(), l_orderkey | from lineitem limit 100""".stripMargin) { diff --git a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala index de007167f87f..3ca66b51897b 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala @@ -22,6 +22,7 @@ import org.apache.gluten.sql.shims.SparkShimLoader import org.apache.gluten.test.TestStats import org.apache.gluten.utils.DecimalArithmeticUtil +import org.apache.spark.{SPARK_REVISION, SPARK_VERSION_SHORT} import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.SQLConfHelper import org.apache.spark.sql.catalyst.expressions._ @@ -686,6 +687,8 @@ object ExpressionConverter extends SQLConfHelper with Logging { LiteralTransformer(Literal(Math.E)) case p: Pi => LiteralTransformer(Literal(Math.PI)) + case v: SparkVersion => + LiteralTransformer(SPARK_VERSION_SHORT + " " + SPARK_REVISION) case dateAdd: DateAdd => BackendsApiManager.getSparkPlanExecApiInstance.genDateAddTransformer( attributeSeq, diff --git a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala index 9012ca8048d6..ebf0c5139245 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala @@ -288,6 +288,7 @@ object ExpressionMappings { Sig[WidthBucket](WIDTH_BUCKET), Sig[ReplicateRows](REPLICATE_ROWS), Sig[RaiseError](RAISE_ERROR), + Sig[SparkVersion](VERSION), // Decimal Sig[UnscaledValue](UNSCALED_VALUE), // Generator function diff --git a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala index 41bc86749a8d..7cc75405bad6 100644 --- a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala +++ b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala @@ -317,6 +317,7 @@ object ExpressionNames { final val WIDTH_BUCKET = "width_bucket" final val REPLICATE_ROWS = "replicaterows" final val RAISE_ERROR = "raise_error" + final val VERSION = "version" // Directly use child expression transformer final val KNOWN_NULLABLE = "known_nullable"