diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala index 89d29781c079..fd0fc62dcbb6 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala @@ -661,17 +661,6 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi { * * Expressions. */ - /** Generate StringSplit transformer. */ - override def genStringSplitTransformer( - substraitExprName: String, - srcExpr: ExpressionTransformer, - regexExpr: ExpressionTransformer, - limitExpr: ExpressionTransformer, - original: StringSplit): ExpressionTransformer = { - // In velox, split function just support tow args, not support limit arg for now - VeloxStringSplitTransformer(substraitExprName, srcExpr, regexExpr, limitExpr, original) - } - /** * Generate Alias transformer. * diff --git a/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala b/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala index 51b19ab140d9..71e58f124fc8 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/expression/ExpressionTransformer.scala @@ -107,33 +107,3 @@ case class VeloxHashExpressionTransformer( ExpressionBuilder.makeScalarFunction(functionId, nodes, typeNode) } } - -case class VeloxStringSplitTransformer( - substraitExprName: String, - srcExpr: ExpressionTransformer, - regexExpr: ExpressionTransformer, - limitExpr: ExpressionTransformer, - original: StringSplit) - extends ExpressionTransformer { - // TODO: split function support limit arg - override def children: Seq[ExpressionTransformer] = srcExpr :: regexExpr :: Nil - - override def doTransform(args: java.lang.Object): ExpressionNode = { - if ( - !regexExpr.isInstanceOf[LiteralTransformer] || - !limitExpr.isInstanceOf[LiteralTransformer] - ) { - throw new GlutenNotSupportException( - "Gluten only supports literal input as limit/regex for split function.") - } - - val limit = limitExpr.doTransform(args).asInstanceOf[IntLiteralNode].getValue - val regex = regexExpr.doTransform(args).asInstanceOf[StringLiteralNode].getValue - if (limit > 0 || regex.length > 1) { - throw new GlutenNotSupportException( - s"$original supported single-length regex and negative limit, but given $limit and $regex") - } - - super.doTransform(args) - } -} diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxStringFunctionsSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxStringFunctionsSuite.scala index 2aedde12a5a9..9357bc754d5d 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxStringFunctionsSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxStringFunctionsSuite.scala @@ -519,23 +519,56 @@ class VeloxStringFunctionsSuite extends VeloxWholeStageTransformerSuite { s"from $LINEITEM_TABLE limit 5") { _ => } } - ignore("split") { + testWithSpecifiedSparkVersion("split", Some("3.4")) { runQueryAndCompare( - s"select l_orderkey, l_comment, split(l_comment, ' ', 3) " + - s"from $LINEITEM_TABLE limit 5") { _ => } + s"select l_orderkey, l_comment, split(l_comment, '') " + + s"from $LINEITEM_TABLE limit 5") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + runQueryAndCompare( + s"select l_orderkey, l_comment, split(l_comment, '', 1) " + + s"from $LINEITEM_TABLE limit 5") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } - // todo incorrect results runQueryAndCompare( - s"select l_orderkey, l_comment, split(l_comment, '[a]', 3) " + - s"from $LINEITEM_TABLE limit 5") { _ => } + s"select l_orderkey, l_comment, split(l_comment, ',') " + + s"from $LINEITEM_TABLE limit 5") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + runQueryAndCompare( + s"select l_orderkey, l_comment, split(l_comment, ',', 10) " + + s"from $LINEITEM_TABLE limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer]) runQueryAndCompare( s"select l_orderkey, split(l_comment, ' ') " + - s"from $LINEITEM_TABLE limit 5") { _ => } + s"from $LINEITEM_TABLE limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer]) + runQueryAndCompare( + s"select l_orderkey, l_comment, split(l_comment, ' ', 3) " + + s"from $LINEITEM_TABLE limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer]) runQueryAndCompare( - s"select l_orderkey, split(l_comment, 'h') " + - s"from $LINEITEM_TABLE limit 5") { _ => } + s"select l_orderkey, l_comment, split(l_comment, '[a-z]+') " + + s"from $LINEITEM_TABLE limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer]) + runQueryAndCompare( + s"select l_orderkey, l_comment, split(l_comment, '[a-z]+', 3) " + + s"from $LINEITEM_TABLE limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer]) + + runQueryAndCompare( + s"select l_orderkey, split(l_comment, '[1-9]+', -2) " + + s"from $LINEITEM_TABLE limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer]) + runQueryAndCompare( + s"select l_orderkey, split(l_comment, '[1-9]+', 0) " + + s"from $LINEITEM_TABLE limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer]) + + runQueryAndCompare( + s"select l_orderkey, l_comment, split(l_comment, 'h') " + + s"from $LINEITEM_TABLE limit 5") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + runQueryAndCompare( + s"select l_orderkey, l_comment, split(l_comment, '[a]', 3) " + + s"from $LINEITEM_TABLE limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer]) } test("substr") { diff --git a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala index 69392a353d7f..3b9e87a2055a 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/backendsapi/SparkPlanExecApi.scala @@ -164,16 +164,6 @@ trait SparkPlanExecApi { original: Expression): ExpressionTransformer = AliasTransformer(substraitExprName, child, original) - /** Generate SplitTransformer. */ - def genStringSplitTransformer( - substraitExprName: String, - srcExpr: ExpressionTransformer, - regexExpr: ExpressionTransformer, - limitExpr: ExpressionTransformer, - original: StringSplit): ExpressionTransformer = { - GenericExpressionTransformer(substraitExprName, Seq(srcExpr, regexExpr, limitExpr), original) - } - /** Generate an expression transformer to transform GetMapValue to Substrait. */ def genGetMapValueTransformer( substraitExprName: String, diff --git a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala index 3ca66b51897b..8bca5dbf8605 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala @@ -377,14 +377,6 @@ object ExpressionConverter extends SQLConfHelper with Logging { replaceWithExpressionTransformerInternal(t.replaceExpr, attributeSeq, expressionsMap), t ) - case s: StringSplit => - BackendsApiManager.getSparkPlanExecApiInstance.genStringSplitTransformer( - substraitExprName, - replaceWithExpressionTransformerInternal(s.str, attributeSeq, expressionsMap), - replaceWithExpressionTransformerInternal(s.regex, attributeSeq, expressionsMap), - replaceWithExpressionTransformerInternal(s.limit, attributeSeq, expressionsMap), - s - ) case r: RegExpReplace => BackendsApiManager.getSparkPlanExecApiInstance.genRegexpReplaceTransformer( substraitExprName,