From 4209b747daccc55662624b6b28e22164efc2f884 Mon Sep 17 00:00:00 2001 From: zhli1142015 Date: Wed, 10 Jul 2024 11:23:53 +0800 Subject: [PATCH] [VL] Enable levenshtein function --- .../execution/ScalarFunctionsValidateSuite.scala | 12 ++++++++++++ .../CommonScalarFunctionParser.cpp | 2 +- docs/velox-backend-support-progress.md | 2 +- .../apache/gluten/expression/ExpressionNames.scala | 2 +- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index 9d0a926e3b0f..c8da6f79c835 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -1189,4 +1189,16 @@ class ScalarFunctionsValidateSuite extends FunctionsValidateTest { } } } + + test("levenshtein") { + runQueryAndCompare("select levenshtein(c_comment, c_address) from customer limit 50") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + } + + testWithSpecifiedSparkVersion("levenshtein with limit", Some("3.5")) { + runQueryAndCompare("select levenshtein(c_comment, c_address, 3) from customer limit 50") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + } } diff --git a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp index ca90b0bdbf08..e4855b507f90 100644 --- a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp +++ b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp @@ -129,7 +129,7 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Translate, translate, translateUTF8); REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Initcap, initcap, initcapUTF8); REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Conv, conv, sparkConv); REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Uuid, uuid, generateUUIDv4); -REGISTER_COMMON_SCALAR_FUNCTION_PARSER(LevenshteinDistance, levenshteinDistance, editDistanceUTF8); +REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Levenshtein, levenshtein, editDistanceUTF8); REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Crc32, crc32, CRC32); REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Murmur3Hash, murmur3hash, sparkMurmurHash3_32); diff --git a/docs/velox-backend-support-progress.md b/docs/velox-backend-support-progress.md index f3f1e100b2b2..5a2d53376396 100644 --- a/docs/velox-backend-support-progress.md +++ b/docs/velox-backend-support-progress.md @@ -161,7 +161,7 @@ Gluten supports 199 functions. (Drag to right to see all data types) | lcase, lower | lower | lower | S | | | | | | | | | | | S | | | | | | | | | | left | | | S | | | | | | | | | | | S | | | | | | | | | | length | length | length | S | | | | | | | | | | | S | | | | | | | | | -| levenshtein | | | | | | | | | | | | | | | | | | | | | | | +| levenshtein | | levenshtein | S | | | | | | | | | | | | | | | | | | | | | locate | strpos | | S | Mismatched | | | | | | | | | | S | | | | | | | | | | lower | lower | lower | S | | | | | | | | | | | S | | | | | | | | | | lpad | lpad | | S | | | | | | | | | | | S | | | | | | | | | diff --git a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala index e3dc3a8ab0a9..0b31ec346705 100644 --- a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala +++ b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala @@ -128,7 +128,7 @@ object ExpressionNames { final val UUID = "uuid" final val BIT_LENGTH = "bit_length" final val OCTET_LENGTH = "octet_length" - final val LEVENSHTEIN = "levenshteinDistance" + final val LEVENSHTEIN = "levenshtein" final val UNBASE64 = "unbase64" final val BASE64 = "base64"