Skip to content

Commit

Permalink
[VL] Explode support Literal array and map (#4019)
Browse files Browse the repository at this point in the history
  • Loading branch information
liujiayi771 authored Dec 16, 2023
1 parent dee511d commit 06a7a6a
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ import io.glutenproject.substrait.plan.PlanNode
import io.glutenproject.validate.NativePlanValidationInfo
import io.glutenproject.vectorized.NativePlanEvaluator

import org.apache.spark.sql.catalyst.expressions.{CreateMap, Explode, Expression, Generator, JsonTuple, Literal, PosExplode}
import org.apache.spark.sql.catalyst.expressions.{CreateMap, Explode, Expression, Generator, JsonTuple, PosExplode}
import org.apache.spark.sql.catalyst.plans.physical.Partitioning
import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, DataType, DateType, DecimalType, DoubleType, FloatType, IntegerType, LongType, MapType, ShortType, StringType, StructType, TimestampType}
import org.apache.spark.sql.types._

class ValidatorApiImpl extends ValidatorApi {

Expand Down Expand Up @@ -95,20 +95,15 @@ class ValidatorApiImpl extends ValidatorApi {
return ValidationResult.notOk(s"Velox backend does not support outer")
}
generator match {
case generator: JsonTuple =>
case _: JsonTuple =>
ValidationResult.notOk(s"Velox backend does not support this json_tuple")
case generator: PosExplode =>
case _: PosExplode =>
// TODO(yuan): support posexplode and remove this check
ValidationResult.notOk(s"Velox backend does not support this posexplode")
case explode: Explode if (explode.child.isInstanceOf[CreateMap]) =>
// explode(MAP(col1, col2))
ValidationResult.notOk(s"Velox backend does not support MAP datatype")
case explode: Explode if (explode.child.isInstanceOf[Literal]) =>
// explode(ARRAY(1, 2, 3))
ValidationResult.notOk(s"Velox backend does not support literal Array datatype")
case explode: Explode =>
explode.child.dataType match {
case _: MapType =>
explode.child match {
case _: CreateMap =>
// explode(MAP(col1, col2))
ValidationResult.notOk(s"Velox backend does not support MAP datatype")
case _ =>
ValidationResult.ok
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -694,4 +694,28 @@ class TestOperator extends VeloxWholeStageTransformerSuite with AdaptiveSparkPla
}
}
}

test("test explode function") {
runQueryAndCompare("""
|SELECT explode(array(1, 2, 3));
|""".stripMargin) {
checkOperatorMatch[GenerateExecTransformer]
}
runQueryAndCompare("""
|SELECT explode(map(1, 'a', 2, 'b'));
|""".stripMargin) {
checkOperatorMatch[GenerateExecTransformer]
}
runQueryAndCompare(
"""
|SELECT explode(array(map(1, 'a', 2, 'b'), map(3, 'c', 4, 'd'), map(5, 'e', 6, 'f')));
|""".stripMargin) {
checkOperatorMatch[GenerateExecTransformer]
}
runQueryAndCompare("""
|SELECT explode(map(1, array(1, 2), 2, array(3, 4)));
|""".stripMargin) {
checkOperatorMatch[GenerateExecTransformer]
}
}
}
17 changes: 15 additions & 2 deletions cpp/velox/substrait/SubstraitToVeloxPlan.cc
Original file line number Diff line number Diff line change
Expand Up @@ -498,8 +498,6 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::

std::vector<core::FieldAccessTypedExprPtr> replicated;
std::vector<core::FieldAccessTypedExprPtr> unnest;
// TODO(yuan): get from generator output
std::vector<std::string> unnestNames = {"C0"};

const auto& generator = generateRel.generator();
const auto& requiredChildOutput = generateRel.child_output();
Expand Down Expand Up @@ -534,6 +532,21 @@ core::PlanNodePtr SubstraitToVeloxPlanConverter::toVeloxPlan(const ::substrait::
unnest.emplace_back(unnestFieldExpr);
}

// TODO(yuan): get from generator output
std::vector<std::string> unnestNames;
int unnestIndex = 0;
for (const auto& variable : unnest) {
if (variable->type()->isArray()) {
unnestNames.emplace_back(fmt::format("C{}", unnestIndex++));
} else if (variable->type()->isMap()) {
unnestNames.emplace_back(fmt::format("C{}", unnestIndex++));
unnestNames.emplace_back(fmt::format("C{}", unnestIndex++));
} else {
VELOX_FAIL(
"Unexpected type of unnest variable. Expected ARRAY or MAP, but got {}.", variable->type()->toString());
}
}

auto node = std::make_shared<core::UnnestNode>(
nextPlanNodeId(), replicated, unnest, std::move(unnestNames), std::nullopt, childNode);

Expand Down

0 comments on commit 06a7a6a

Please sign in to comment.