Skip to content

Commit

Permalink
Remove special handling for split
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Aug 14, 2024
1 parent 8195284 commit 1cb80d8
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -661,17 +661,6 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
* * Expressions.
*/

/** Generate StringSplit transformer. */
override def genStringSplitTransformer(
substraitExprName: String,
srcExpr: ExpressionTransformer,
regexExpr: ExpressionTransformer,
limitExpr: ExpressionTransformer,
original: StringSplit): ExpressionTransformer = {
// In velox, split function just support tow args, not support limit arg for now
VeloxStringSplitTransformer(substraitExprName, srcExpr, regexExpr, limitExpr, original)
}

/**
* Generate Alias transformer.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,33 +107,3 @@ case class VeloxHashExpressionTransformer(
ExpressionBuilder.makeScalarFunction(functionId, nodes, typeNode)
}
}

case class VeloxStringSplitTransformer(
substraitExprName: String,
srcExpr: ExpressionTransformer,
regexExpr: ExpressionTransformer,
limitExpr: ExpressionTransformer,
original: StringSplit)
extends ExpressionTransformer {
// TODO: split function support limit arg
override def children: Seq[ExpressionTransformer] = srcExpr :: regexExpr :: Nil

override def doTransform(args: java.lang.Object): ExpressionNode = {
if (
!regexExpr.isInstanceOf[LiteralTransformer] ||
!limitExpr.isInstanceOf[LiteralTransformer]
) {
throw new GlutenNotSupportException(
"Gluten only supports literal input as limit/regex for split function.")
}

val limit = limitExpr.doTransform(args).asInstanceOf[IntLiteralNode].getValue
val regex = regexExpr.doTransform(args).asInstanceOf[StringLiteralNode].getValue
if (limit > 0 || regex.length > 1) {
throw new GlutenNotSupportException(
s"$original supported single-length regex and negative limit, but given $limit and $regex")
}

super.doTransform(args)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -519,23 +519,56 @@ class VeloxStringFunctionsSuite extends VeloxWholeStageTransformerSuite {
s"from $LINEITEM_TABLE limit 5") { _ => }
}

ignore("split") {
testWithSpecifiedSparkVersion("split", Some("3.4")) {
runQueryAndCompare(
s"select l_orderkey, l_comment, split(l_comment, ' ', 3) " +
s"from $LINEITEM_TABLE limit 5") { _ => }
s"select l_orderkey, l_comment, split(l_comment, '') " +
s"from $LINEITEM_TABLE limit 5") {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
runQueryAndCompare(
s"select l_orderkey, l_comment, split(l_comment, '', 1) " +
s"from $LINEITEM_TABLE limit 5") {
checkGlutenOperatorMatch[ProjectExecTransformer]
}

// todo incorrect results
runQueryAndCompare(
s"select l_orderkey, l_comment, split(l_comment, '[a]', 3) " +
s"from $LINEITEM_TABLE limit 5") { _ => }
s"select l_orderkey, l_comment, split(l_comment, ',') " +
s"from $LINEITEM_TABLE limit 5") {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
runQueryAndCompare(
s"select l_orderkey, l_comment, split(l_comment, ',', 10) " +
s"from $LINEITEM_TABLE limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer])

runQueryAndCompare(
s"select l_orderkey, split(l_comment, ' ') " +
s"from $LINEITEM_TABLE limit 5") { _ => }
s"from $LINEITEM_TABLE limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer])
runQueryAndCompare(
s"select l_orderkey, l_comment, split(l_comment, ' ', 3) " +
s"from $LINEITEM_TABLE limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer])

runQueryAndCompare(
s"select l_orderkey, split(l_comment, 'h') " +
s"from $LINEITEM_TABLE limit 5") { _ => }
s"select l_orderkey, l_comment, split(l_comment, '[a-z]+') " +
s"from $LINEITEM_TABLE limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer])
runQueryAndCompare(
s"select l_orderkey, l_comment, split(l_comment, '[a-z]+', 3) " +
s"from $LINEITEM_TABLE limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer])

runQueryAndCompare(
s"select l_orderkey, split(l_comment, '[1-9]+', -2) " +
s"from $LINEITEM_TABLE limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer])
runQueryAndCompare(
s"select l_orderkey, split(l_comment, '[1-9]+', 0) " +
s"from $LINEITEM_TABLE limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer])

runQueryAndCompare(
s"select l_orderkey, l_comment, split(l_comment, 'h') " +
s"from $LINEITEM_TABLE limit 5") {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
runQueryAndCompare(
s"select l_orderkey, l_comment, split(l_comment, '[a]', 3) " +
s"from $LINEITEM_TABLE limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer])
}

test("substr") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -164,16 +164,6 @@ trait SparkPlanExecApi {
original: Expression): ExpressionTransformer =
AliasTransformer(substraitExprName, child, original)

/** Generate SplitTransformer. */
def genStringSplitTransformer(
substraitExprName: String,
srcExpr: ExpressionTransformer,
regexExpr: ExpressionTransformer,
limitExpr: ExpressionTransformer,
original: StringSplit): ExpressionTransformer = {
GenericExpressionTransformer(substraitExprName, Seq(srcExpr, regexExpr, limitExpr), original)
}

/** Generate an expression transformer to transform GetMapValue to Substrait. */
def genGetMapValueTransformer(
substraitExprName: String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -377,14 +377,6 @@ object ExpressionConverter extends SQLConfHelper with Logging {
replaceWithExpressionTransformerInternal(t.replaceExpr, attributeSeq, expressionsMap),
t
)
case s: StringSplit =>
BackendsApiManager.getSparkPlanExecApiInstance.genStringSplitTransformer(
substraitExprName,
replaceWithExpressionTransformerInternal(s.str, attributeSeq, expressionsMap),
replaceWithExpressionTransformerInternal(s.regex, attributeSeq, expressionsMap),
replaceWithExpressionTransformerInternal(s.limit, attributeSeq, expressionsMap),
s
)
case r: RegExpReplace =>
BackendsApiManager.getSparkPlanExecApiInstance.genRegexpReplaceTransformer(
substraitExprName,
Expand Down

0 comments on commit 1cb80d8

Please sign in to comment.