Skip to content

Commit

Permalink
Fix split function
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Feb 23, 2024
1 parent 6febf8a commit f863d8c
Show file tree
Hide file tree
Showing 6 changed files with 4 additions and 64 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ import org.apache.spark.sql.catalyst.{AggregateFunctionRewriteRule, FlushableHas
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
import org.apache.spark.sql.catalyst.catalog.BucketSpec
import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Cast, CreateNamedStruct, ElementAt, Expression, ExpressionInfo, GetArrayItem, GetMapValue, GetStructField, If, IsNaN, Literal, Murmur3Hash, NamedExpression, NaNvl, Round, StringSplit, StringTrim}
import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Cast, CreateNamedStruct, ElementAt, Expression, ExpressionInfo, GetArrayItem, GetMapValue, GetStructField, If, IsNaN, Literal, Murmur3Hash, NamedExpression, NaNvl, Round, StringTrim}
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, HLLAdapter}
import org.apache.spark.sql.catalyst.optimizer.BuildSide
import org.apache.spark.sql.catalyst.plans.JoinType
Expand Down Expand Up @@ -406,17 +406,6 @@ class SparkPlanExecApiImpl extends SparkPlanExecApi {
DecimalRoundTransformer(substraitExprName, child, original)
}

/** Generate StringSplit transformer. */
override def genStringSplitTransformer(
substraitExprName: String,
srcExpr: ExpressionTransformer,
regexExpr: ExpressionTransformer,
limitExpr: ExpressionTransformer,
original: StringSplit): ExpressionTransformer = {
// In velox, split function just support tow args, not support limit arg for now
VeloxStringSplitTransformer(substraitExprName, srcExpr, regexExpr, limitExpr, original)
}

/**
* Generate Alias transformer.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,33 +113,3 @@ case class VeloxHashExpressionTransformer(
ExpressionBuilder.makeScalarFunction(functionId, nodes, typeNode)
}
}

case class VeloxStringSplitTransformer(
substraitExprName: String,
srcExpr: ExpressionTransformer,
regexExpr: ExpressionTransformer,
limitExpr: ExpressionTransformer,
original: StringSplit)
extends ExpressionTransformer {

override def doTransform(args: java.lang.Object): ExpressionNode = {
if (
!regexExpr.isInstanceOf[LiteralTransformer] ||
!limitExpr.isInstanceOf[LiteralTransformer]
) {
throw new UnsupportedOperationException(
"Gluten only supports literal input as limit/regex for split function.")
}

val limit = limitExpr.doTransform(args).asInstanceOf[IntLiteralNode].getValue
val regex = regexExpr.doTransform(args).asInstanceOf[StringLiteralNode].getValue
if (limit > 0 || regex.length > 1) {
throw new UnsupportedOperationException(
s"$original supported single-length regex and negative limit, but given $limit and $regex")
}

// TODO: split function support limit arg
GenericExpressionTransformer(substraitExprName, Seq(srcExpr, regexExpr), original)
.doTransform(args)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -523,12 +523,11 @@ class VeloxStringFunctionsSuite extends VeloxWholeStageTransformerSuite {
s"from $LINEITEM_TABLE limit 5") { _ => }
}

ignore("split") {
test("split") {
runQueryAndCompare(
s"select l_orderkey, l_comment, split(l_comment, ' ', 3) " +
s"from $LINEITEM_TABLE limit 5") { _ => }

// todo incorrect results
runQueryAndCompare(
s"select l_orderkey, l_comment, split(l_comment, '[a]', 3) " +
s"from $LINEITEM_TABLE limit 5") { _ => }
Expand Down
4 changes: 2 additions & 2 deletions ep/build-velox/src/get_velox.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

set -exu

VELOX_REPO=https://github.com/oap-project/velox.git
VELOX_BRANCH=2024_02_22
VELOX_REPO=https://github.com/rui-mo/velox.git
VELOX_BRANCH=test_split
VELOX_HOME=""

#Set on run gluten on HDFS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,16 +143,6 @@ trait SparkPlanExecApi {
original: Expression): ExpressionTransformer =
AliasTransformer(substraitExprName, child, original)

/** Generate SplitTransformer. */
def genStringSplitTransformer(
substraitExprName: String,
srcExpr: ExpressionTransformer,
regexExpr: ExpressionTransformer,
limitExpr: ExpressionTransformer,
original: StringSplit): ExpressionTransformer = {
GenericExpressionTransformer(substraitExprName, Seq(srcExpr, regexExpr, limitExpr), original)
}

def genRandTransformer(
substraitExprName: String,
explicitSeed: ExpressionTransformer,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -353,14 +353,6 @@ object ExpressionConverter extends SQLConfHelper with Logging {
replaceWithExpressionTransformerInternal(l.third, attributeSeq, expressionsMap),
l
)
case s: StringSplit =>
BackendsApiManager.getSparkPlanExecApiInstance.genStringSplitTransformer(
substraitExprName,
replaceWithExpressionTransformerInternal(s.str, attributeSeq, expressionsMap),
replaceWithExpressionTransformerInternal(s.regex, attributeSeq, expressionsMap),
replaceWithExpressionTransformerInternal(s.limit, attributeSeq, expressionsMap),
s
)
case r: RegExpReplace =>
RegExpReplaceTransformer(
substraitExprName,
Expand Down

0 comments on commit f863d8c

Please sign in to comment.