From 9e2ec55e9eecc911945929ca3134e78913c5fd63 Mon Sep 17 00:00:00 2001 From: Zhen Li <10524738+zhli1142015@users.noreply.github.com> Date: Mon, 6 May 2024 15:21:05 +0800 Subject: [PATCH] [VL] Enable arrays_zip function (#5609) [VL] Enable arrays_zip function. --- .../apache/gluten/utils/CHExpressionUtil.scala | 1 + .../ScalarFunctionsValidateSuite.scala | 18 ++++++++++++++++++ cpp/velox/substrait/SubstraitParser.cc | 3 ++- docs/velox-backend-support-progress.md | 2 +- .../gluten/expression/ExpressionMappings.scala | 1 + .../gluten/expression/ExpressionNames.scala | 1 + 6 files changed, 24 insertions(+), 2 deletions(-) diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala index 540af65acac5..bbe65034a0a3 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/utils/CHExpressionUtil.scala @@ -175,6 +175,7 @@ object CHExpressionUtil { ARRAY_EXCEPT -> DefaultValidator(), ARRAY_REPEAT -> DefaultValidator(), ARRAY_REMOVE -> DefaultValidator(), + ARRAYS_ZIP -> DefaultValidator(), DATE_FROM_UNIX_DATE -> DefaultValidator(), UNIX_DATE -> DefaultValidator(), MONOTONICALLY_INCREASING_ID -> DefaultValidator(), diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index d4ef3941205f..6fb45141188f 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -820,6 +820,24 @@ class ScalarFunctionsValidateSuite extends FunctionsValidateTest { } } + test("arrays_zip") { + withTempPath { + path => + Seq[(Seq[Integer], Seq[Integer])]( + (Seq(1, 2, 3), Seq(3, 4)), + (Seq(5, null), Seq(null, 1, 2))) + .toDF("v1", "v2") + .write + .parquet(path.getCanonicalPath) + + spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("array_tbl") + + runQueryAndCompare("select arrays_zip(v1, v2) from array_tbl;") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + } + } + test("negative") { runQueryAndCompare("select negative(l_orderkey) from lineitem") { checkGlutenOperatorMatch[ProjectExecTransformer] diff --git a/cpp/velox/substrait/SubstraitParser.cc b/cpp/velox/substrait/SubstraitParser.cc index 9050c2ec8cd8..30fe1d7b3a64 100644 --- a/cpp/velox/substrait/SubstraitParser.cc +++ b/cpp/velox/substrait/SubstraitParser.cc @@ -406,7 +406,8 @@ std::unordered_map SubstraitParser::substraitVeloxFunc {"try_add", "plus"}, {"forall", "all_match"}, {"exists", "any_match"}, - {"negative", "unaryminus"}}; + {"negative", "unaryminus"}, + {"arrays_zip", "zip"}}; const std::unordered_map SubstraitParser::typeMap_ = { {"bool", "BOOLEAN"}, diff --git a/docs/velox-backend-support-progress.md b/docs/velox-backend-support-progress.md index dcd73196158c..90f4fda9ea4b 100644 --- a/docs/velox-backend-support-progress.md +++ b/docs/velox-backend-support-progress.md @@ -273,7 +273,7 @@ Gluten supports 199 functions. (Drag to right to see all data types) | array_sort | array_sort | array_sort | S | | | | | | | | | | | | | | | | | | | | | array_union | | | | | | | | | | | | | | | | | | | | | | | | arrays_overlap | array_overlap | | | | | | | | | | | | | | | | | | | | | | -| arrays_zip | | | | | | | | | | | | | | | | | | | | | | | +| arrays_zip | zip | | S | | | | | | | | | | | | | | | | | | | | | cardinality | cardinality | | | | | | | | | | | | | | | | | | | | | | | element_at | element_at | element_at | S | | | | | | | | | | | | | | | | S | S | | | | exists | any_match | | S | | | | | | | | | | | | | | | | | | | | diff --git a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala index 7b4d6cd159ea..19a77e515eec 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionMappings.scala @@ -234,6 +234,7 @@ object ExpressionMappings { Sig[ArrayExcept](ARRAY_EXCEPT), Sig[ArrayRepeat](ARRAY_REPEAT), Sig[ArrayRemove](ARRAY_REMOVE), + Sig[ArraysZip](ARRAYS_ZIP), Sig[ArrayFilter](FILTER), Sig[ArrayForAll](FORALL), Sig[ArrayExists](EXISTS), diff --git a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala index 7d516ecd006d..1e0d86a66c4f 100644 --- a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala +++ b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala @@ -250,6 +250,7 @@ object ExpressionNames { final val ARRAY_EXCEPT = "array_except" final val ARRAY_REPEAT = "array_repeat" final val ARRAY_REMOVE = "array_remove" + final val ARRAYS_ZIP = "arrays_zip" final val FILTER = "filter" final val FORALL = "forall" final val EXISTS = "exists"