From 33d38a333e3ee3395b3bfe5ff132b69fabd0a631 Mon Sep 17 00:00:00 2001 From: gaoyangxiaozhu Date: Fri, 28 Jun 2024 19:32:33 +0800 Subject: [PATCH 1/6] mask function support --- .../ScalarFunctionsValidateSuite.scala | 18 ++++++++++++++++++ ep/build-velox/src/get_velox.sh | 4 ++-- .../gluten/expression/ExpressionNames.scala | 1 + .../sql/shims/spark35/Spark35Shims.scala | 3 ++- 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index bd32a799c3ac..59833cc7ba3f 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -815,6 +815,24 @@ class ScalarFunctionsValidateSuite extends FunctionsValidateTest { } } + testWithSpecifiedSparkVersion("mask", Some("3.5")) { + runQueryAndCompare("SELECT mask(c_comment) FROM customer limit 50") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + runQueryAndCompare("SELECT mask(c_comment, 'Y') FROM customer limit 50") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + runQueryAndCompare("SELECT mask(c_comment, 'Y', 'y') FROM customer limit 50") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + runQueryAndCompare("SELECT mask(c_comment, 'Y', 'y', 'o') FROM customer limit 50") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + runQueryAndCompare("SELECT mask(c_comment, 'Y', 'y', 'o', '*') FROM customer limit 50") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + } + test("bit_length") { runQueryAndCompare( "select bit_length(c_comment), bit_length(cast(c_comment as binary))" + diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index 0adc1ce8ff61..d10e469e4d05 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -16,8 +16,8 @@ set -exu -VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2024_06_28 +VELOX_REPO=https://github.com/gaoyangxiaozhu/velox.git +VELOX_BRANCH=gayangya/mask_oap VELOX_HOME="" #Set on run gluten on HDFS diff --git a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala index 87b1b4e7539b..e791684ed442 100644 --- a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala +++ b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala @@ -130,6 +130,7 @@ object ExpressionNames { final val LEVENSHTEIN = "levenshteinDistance" final val UNBASE64 = "unbase64" final val BASE64 = "base64" + final val MASK = "mask" // URL functions final val PARSE_URL = "parse_url" diff --git a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala index 8ac8d323efd6..8b306442a65c 100644 --- a/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala +++ b/shims/spark35/src/main/scala/org/apache/gluten/sql/shims/spark35/Spark35Shims.scala @@ -77,7 +77,8 @@ class Spark35Shims extends SparkShims { Sig[Sec](ExpressionNames.SEC), Sig[Csc](ExpressionNames.CSC), Sig[KnownNullable](ExpressionNames.KNOWN_NULLABLE), - Sig[Empty2Null](ExpressionNames.EMPTY2NULL) + Sig[Empty2Null](ExpressionNames.EMPTY2NULL), + Sig[Mask](ExpressionNames.MASK) ) } From 29d9bd4dd5d7c786805e516e830259db21e7e016 Mon Sep 17 00:00:00 2001 From: gaoyangxiaozhu Date: Mon, 1 Jul 2024 14:57:20 +0800 Subject: [PATCH 2/6] triiger CI From e944b570780aa1b6121f3f2dd94cdb97873b2300 Mon Sep 17 00:00:00 2001 From: gaoyangxiaozhu Date: Mon, 1 Jul 2024 15:33:39 +0800 Subject: [PATCH 3/6] trigger ci From cfa34b521c875313aeb4a2f392b19743933aab41 Mon Sep 17 00:00:00 2001 From: gaoyangxiaozhu Date: Mon, 1 Jul 2024 15:41:27 +0800 Subject: [PATCH 4/6] trigger ci From 8fc3ece6429af3ff52dad41e69f723e6850f7f4c Mon Sep 17 00:00:00 2001 From: Rong Ma Date: Mon, 1 Jul 2024 20:53:27 +0800 Subject: [PATCH 5/6] [CI] Fix centos7 CI build error (#6298) --- .github/workflows/velox_docker.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/velox_docker.yml b/.github/workflows/velox_docker.yml index 098b2a2d57eb..ded2032f4241 100644 --- a/.github/workflows/velox_docker.yml +++ b/.github/workflows/velox_docker.yml @@ -192,10 +192,11 @@ jobs: name: velox-arrow-jar-centos-7-${{github.sha}} path: /root/.m2/repository/org/apache/arrow/ - name: Update mirror list - if: matrix.os == 'centos:8' run: | - sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true - sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* || true + if [ "${{ matrix.os }}" = "centos:7" ] || [ "${{ matrix.os }}" = "centos:8" ]; then + sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* || true + sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* || true + fi - name: Setup java and maven run: | if [ "${{ matrix.java }}" = "java-17" ]; then From 5b4ffa5c4e5fe1cea2996115452d789801347aba Mon Sep 17 00:00:00 2001 From: gaoyangxiaozhu Date: Tue, 30 Jul 2024 11:52:38 +0800 Subject: [PATCH 6/6] small change --- .../apache/gluten/execution/ScalarFunctionsValidateSuite.scala | 2 +- .../org/apache/gluten/sql/shims/spark34/Spark34Shims.scala | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index 0b34c397e189..13ade14b5943 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -918,7 +918,7 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateTest { } } - testWithSpecifiedSparkVersion("mask", Some("3.5")) { + testWithSpecifiedSparkVersion("mask", Some("3.4")) { runQueryAndCompare("SELECT mask(c_comment) FROM customer limit 50") { checkGlutenOperatorMatch[ProjectExecTransformer] } diff --git a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala index 203256cf5fec..cd7e4347d6b5 100644 --- a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala +++ b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala @@ -80,7 +80,8 @@ class Spark34Shims extends SparkShims { Sig[Empty2Null](ExpressionNames.EMPTY2NULL), Sig[TimestampAdd](ExpressionNames.TIMESTAMP_ADD), Sig[RoundFloor](ExpressionNames.FLOOR), - Sig[RoundCeil](ExpressionNames.CEIL) + Sig[RoundCeil](ExpressionNames.CEIL), + Sig[Mask](ExpressionNames.MASK) ) }