Skip to content

Commit

Permalink
[YQL-18043] Support MAX_OF/MIN_OF for all comparable types (#3720)
Browse files Browse the repository at this point in the history
  • Loading branch information
nepal authored Apr 15, 2024
1 parent 9eb8fa0 commit 6b2d235
Show file tree
Hide file tree
Showing 9 changed files with 235 additions and 45 deletions.
10 changes: 9 additions & 1 deletion ydb/library/yql/core/common_opt/yql_co_simple2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,14 @@ TExprNode::TPtr OptimizeAnd(const TExprNode::TPtr& node, TExprContext& ctx) {
return ctx.ChangeChildren(*node, std::move(newChildren));
}

TExprNode::TPtr OptimizeMinMax(const TExprNode::TPtr& node, TExprContext& ctx) {
if (node->GetTypeAnn()->IsOptionalOrNull()) {
return node;
}

return OptimizeDups(node, ctx);
}

TExprNode::TPtr CheckIfWorldWithSame(const TExprNode::TPtr& node, TExprContext& ctx) {
if (node->Child(3U) == node->Child(2U)) {
YQL_CLOG(DEBUG, Core) << node->Content() << " with identical branches";
Expand Down Expand Up @@ -589,7 +597,7 @@ void RegisterCoSimpleCallables2(TCallableOptimizerMap& map) {
map["And"] = std::bind(&OptimizeAnd, _1, _2);
map["Or"] = std::bind(OptimizeDups, _1, _2);

map["Min"] = map["Max"] = std::bind(&OptimizeDups, _1, _2);
map["Min"] = map["Max"] = std::bind(&OptimizeMinMax, _1, _2);

map["AggrMin"] = map["AggrMax"] = map["Coalesce"] = std::bind(&DropAggrOverSame, _1);

Expand Down
107 changes: 107 additions & 0 deletions ydb/library/yql/core/peephole_opt/yql_opt_peephole_physical.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2966,6 +2966,111 @@ TExprNode::TPtr ExpandAggrMinMax(const TExprNode::TPtr& node, TExprContext& ctx)
.Seal().Build();
}

template<typename T>
TExprNode::TPtr DoExpandMinMax(TStringBuf name, bool usePlainCompare, TPositionHandle pos, T argsBegin, T argsEnd, TExprContext& ctx) {
const size_t size = argsEnd - argsBegin;
YQL_ENSURE(size > 0);
if (size == 1) {
return *argsBegin;
}

TExprNode::TPtr left;
TExprNode::TPtr right;

if (size > 2) {
const size_t half = size / 2;
left = DoExpandMinMax(name, usePlainCompare, pos, argsBegin, argsBegin + half, ctx);
right = DoExpandMinMax(name, usePlainCompare, pos, argsBegin + half, argsEnd, ctx);
} else {
left = *argsBegin;
right = *(argsBegin + 1);
}

return ctx.Builder(pos)
.Callable("If")
.Callable(0, usePlainCompare ? (name == "Min" ? "<=" : ">=") :
(name == "Min" ? "AggrLessOrEqual" : "AggrGreaterOrEqual"))
.Add(0, left)
.Add(1, right)
.Seal()
.Add(1, left)
.Add(2, right)
.Seal()
.Build();
}

TExprNode::TPtr ExpandMinMax(const TExprNode::TPtr& node, TExprContext& ctx) {
YQL_ENSURE(node->ChildrenSize() >= 2);
if (IsDataOrOptionalOfData(node->GetTypeAnn())) {
return node;
}

const auto& children = node->ChildrenList();
if (!node->GetTypeAnn()->IsOptionalOrNull()) {
YQL_CLOG(DEBUG, CorePeepHole) << "Expand " << node->Content() << " with complex non-nullable types";
return DoExpandMinMax(node->Content(), true, node->Pos(), children.begin(), children.end(), ctx);
}

YQL_CLOG(DEBUG, CorePeepHole) << "Expand " << node->Content() << " with complex nullable types";
TExprNodeList childrenWithPositions;
for (ui32 i = 0; i < children.size(); ++i) {
const auto& child = children[i];
childrenWithPositions.push_back(ctx.Builder(child->Pos())
.List()
.Add(0, child)
.Callable(1, "Uint32")
.Atom(0, i)
.Seal()
.Seal()
.Build()
);
}
auto candidateTuple = DoExpandMinMax(node->Content(), false, node->Pos(), childrenWithPositions.begin(), childrenWithPositions.end(), ctx);

TExprNodeList compareWithCandidate;
for (ui32 i = 0; i < children.size(); ++i) {
const auto& child = children[i];
compareWithCandidate.push_back(ctx.Builder(child->Pos())
.Callable("If")
.Callable(0, "==")
.Callable(0, "Nth")
.Add(0, candidateTuple)
.Atom(1, 1)
.Seal()
.Callable(1, "Uint32")
.Atom(0, i)
.Seal()
.Seal()
.Add(1, MakeBool<true>(child->Pos(), ctx))
.Callable(2, node->IsCallable("Min") ? "<=" : ">=")
.Callable(0, "Nth")
.Add(0, candidateTuple)
.Atom(1, 0)
.Seal()
.Add(1, child)
.Seal()
.Seal()
.Build()
);
}

return ctx.Builder(node->Pos())
.Callable("If")
.Callable(0, "Coalesce")
.Add(0, ctx.NewCallable(node->Pos(), "And", std::move(compareWithCandidate)))
.Add(1, MakeBool<false>(node->Pos(), ctx))
.Seal()
.Callable(1, "Nth")
.Add(0, candidateTuple)
.Atom(1, 0)
.Seal()
.Callable(2, "Nothing")
.Add(0, ExpandType(node->Pos(), *node->GetTypeAnn(), ctx))
.Seal()
.Seal()
.Build();
}

template <bool Ordered>
TExprNode::TPtr OptimizeMap(const TExprNode::TPtr& node, TExprContext& ctx) {
const auto& arg = node->Tail().Head().Head();
Expand Down Expand Up @@ -7956,6 +8061,8 @@ struct TPeepHoleRules {
{"OptionalReduce", &ExpandOptionalReduce},
{"AggrMin", &ExpandAggrMinMax<true>},
{"AggrMax", &ExpandAggrMinMax<false>},
{"Min", &ExpandMinMax},
{"Max", &ExpandMinMax},
{"And", &OptimizeLogicalDups<true>},
{"Or", &OptimizeLogicalDups<false>},
{"CombineByKey", &ExpandCombineByKey},
Expand Down
67 changes: 26 additions & 41 deletions ydb/library/yql/core/type_ann/type_ann_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2078,60 +2078,45 @@ namespace NTypeAnnImpl {
return IGraphTransformer::TStatus::Error;
}

if (1U == input->ChildrenSize()) {
output = input->HeadPtr();
return IGraphTransformer::TStatus::Repeat;
}

const TTypeAnnotationNode* firstType = input->Head().GetTypeAnn();
for (ui32 i = 0; i < input->ChildrenSize(); ++i) {
if (IsNull(*input->Child(i))) {
output = input->ChildPtr(i);
return IGraphTransformer::TStatus::Repeat;
if (!EnsurePersistable(*input->Child(i), ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}

if (CanCompare<false>(firstType, input->Child(i)->GetTypeAnn()) == ECompareOptions::Uncomparable) {
ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()),
TStringBuilder() << "Uncomparable types: " << *firstType << " and " << *input->Child(i)->GetTypeAnn()));
return IGraphTransformer::TStatus::Error;
}
}

bool isOptional1;
const TDataExprType* dataType1;
if (!EnsureDataOrOptionalOfData(input->Head(), isOptional1, dataType1, ctx.Expr)) {
const auto commonItemType = CommonTypeForChildren(*input, ctx.Expr);
if (!commonItemType) {
return IGraphTransformer::TStatus::Error;
}

bool isSomeOptional = isOptional1;
for (ui32 index = 1; index < input->ChildrenSize(); ++index) {
bool isOptional2;
const TDataExprType* dataType2;
if (!EnsureDataOrOptionalOfData(*input->Child(index), isOptional2, dataType2, ctx.Expr)) {
return IGraphTransformer::TStatus::Error;
}
if (const auto status = ConvertChildrenToType(input, commonItemType, ctx.Expr); status != IGraphTransformer::TStatus::Ok) {
return status;
}

isSomeOptional = isSomeOptional || isOptional2;
const bool isLeftNumeric = IsDataTypeNumeric(dataType1->GetSlot());
const bool isRightNumeric = IsDataTypeNumeric(dataType2->GetSlot());
if (isLeftNumeric != isRightNumeric) {
ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), TStringBuilder() << "mismatch of data types: "
<< *input->Head().GetTypeAnn() << " != " << *input->Child(index)->GetTypeAnn()));
return IGraphTransformer::TStatus::Error;
}
const bool addTopLevelOptional = commonItemType->HasOptionalOrNull() && !commonItemType->IsOptionalOrNull();
const TTypeAnnotationNode* resultType = addTopLevelOptional ? ctx.Expr.MakeType<TOptionalExprType>(commonItemType) : commonItemType;

if (isLeftNumeric) {
auto commonTypeSlot = GetNumericDataTypeByLevel(Max(GetNumericDataTypeLevel(dataType1->GetSlot()),
GetNumericDataTypeLevel(dataType2->GetSlot())));
dataType1 = (commonTypeSlot == dataType1->GetSlot()) ? dataType1 : dataType2;
}
else {
if (!IsSameAnnotation(*dataType1, *dataType2)) {
ctx.Expr.AddError(TIssue(ctx.Expr.GetPosition(input->Pos()), TStringBuilder() << "mismatch of data types: "
<< *input->Head().GetTypeAnn() << " != " << *input->Child(index)->GetTypeAnn()));
return IGraphTransformer::TStatus::Error;
}
}
if (1U == input->ChildrenSize()) {
output = ctx.Expr.WrapByCallableIf(addTopLevelOptional, "Just", input->HeadPtr());
return IGraphTransformer::TStatus::Repeat;
}

input->SetTypeAnn(dataType1);
if (isSomeOptional) {
input->SetTypeAnn(ctx.Expr.MakeType<TOptionalExprType>(input->GetTypeAnn()));
for (ui32 i = 0; i < input->ChildrenSize(); ++i) {
if (input->Child(i)->GetTypeAnn()->HasNull()) {
output = ctx.Expr.NewCallable(input->Child(i)->Pos(), "Nothing", { ExpandType(input->Child(i)->Pos(), *resultType, ctx.Expr) });
return IGraphTransformer::TStatus::Repeat;
}
}

input->SetUnorderedChildren();
input->SetTypeAnn(resultType);
return IGraphTransformer::TStatus::Ok;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -702,9 +702,9 @@
],
"test.test[Builtins-MinMaxOverOptional-Debug]": [
{
"checksum": "45888fd52629038c062c8308f2d4623c",
"size": 2147,
"uri": "https://{canondata_backend}/1923547/974828dc20985de13b5f598a57260572224c273b/resource.tar.gz#test.test_Builtins-MinMaxOverOptional-Debug_/opt.yql"
"checksum": "f4383e77cd225f4eab470f7545c69a78",
"size": 2027,
"uri": "https://{canondata_backend}/1937424/a08fc34c802bc8ed51d545129d5e7e77312a755d/resource.tar.gz#test.test_Builtins-MinMaxOverOptional-Debug_/opt.yql"
},
{
"checksum": "99a23a80241cdd0057583910f9155f61",
Expand Down
22 changes: 22 additions & 0 deletions ydb/library/yql/tests/sql/dq_file/part7/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -763,6 +763,28 @@
}
],
"test.test[expr-literal_strings-default.txt-Results]": [],
"test.test[expr-minmax_for_complex_types-default.txt-Analyze]": [
{
"checksum": "b4dd508a329723c74293d80f0278c705",
"size": 505,
"uri": "https://{canondata_backend}/1937001/48471e6d9c7324ace71b9be0fd74072f683de033/resource.tar.gz#test.test_expr-minmax_for_complex_types-default.txt-Analyze_/plan.txt"
}
],
"test.test[expr-minmax_for_complex_types-default.txt-Debug]": [
{
"checksum": "f9c01f8b310f300be1b4d8b3c064a692",
"size": 1152,
"uri": "https://{canondata_backend}/1784826/ca2edc7f1a2a5c0080870544a7f83cdc18543a05/resource.tar.gz#test.test_expr-minmax_for_complex_types-default.txt-Debug_/opt.yql_patched"
}
],
"test.test[expr-minmax_for_complex_types-default.txt-Plan]": [
{
"checksum": "b4dd508a329723c74293d80f0278c705",
"size": 505,
"uri": "https://{canondata_backend}/1937001/48471e6d9c7324ace71b9be0fd74072f683de033/resource.tar.gz#test.test_expr-minmax_for_complex_types-default.txt-Plan_/plan.txt"
}
],
"test.test[expr-minmax_for_complex_types-default.txt-Results]": [],
"test.test[expr-non_persistable_group_by_having_some_fail--Results]": [
{
"uri": "file://test.test_expr-non_persistable_group_by_having_some_fail--Results_/extracted"
Expand Down
14 changes: 14 additions & 0 deletions ydb/library/yql/tests/sql/hybrid_file/part5/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -979,6 +979,20 @@
"uri": "https://{canondata_backend}/1936947/a5f83e5d38179c14126d53519dc062cef98113ec/resource.tar.gz#test.test_expr-longint_builtins-default.txt-Plan_/plan.txt"
}
],
"test.test[expr-minmax_for_complex_types-default.txt-Debug]": [
{
"checksum": "85147082d7caa6e433b9f2866afa41d8",
"size": 1151,
"uri": "https://{canondata_backend}/1881367/db84cf65a0fe23688d717b9be3cef15f9249c865/resource.tar.gz#test.test_expr-minmax_for_complex_types-default.txt-Debug_/opt.yql_patched"
}
],
"test.test[expr-minmax_for_complex_types-default.txt-Plan]": [
{
"checksum": "b4dd508a329723c74293d80f0278c705",
"size": 505,
"uri": "https://{canondata_backend}/1937424/6ef1f652a39663221b6531b56b64227e1ee24197/resource.tar.gz#test.test_expr-minmax_for_complex_types-default.txt-Plan_/plan.txt"
}
],
"test.test[expr-replace_member-default.txt-Debug]": [
{
"checksum": "f7c607a8eebd9aae9166d43b79cbf119",
Expand Down
14 changes: 14 additions & 0 deletions ydb/library/yql/tests/sql/sql2yql/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -5802,6 +5802,13 @@
"uri": "https://{canondata_backend}/1784117/d56ae82ad9d30397a41490647be1bd2124718f98/resource.tar.gz#test_sql2yql.test_expr-many_opt_comp_/sql.yql"
}
],
"test_sql2yql.test[expr-minmax_for_complex_types]": [
{
"checksum": "aaf854dd05cb093b512c206f579cae35",
"size": 3840,
"uri": "https://{canondata_backend}/1942671/92396c368500f27bb9a26ee02762281c902efb4e/resource.tar.gz#test_sql2yql.test_expr-minmax_for_complex_types_/sql.yql"
}
],
"test_sql2yql.test[expr-non_persistable_group_by_column_fail]": [
{
"checksum": "392fb0610bd8fc598098ae660979a3e0",
Expand Down Expand Up @@ -24065,6 +24072,13 @@
"uri": "https://{canondata_backend}/1880306/64654158d6bfb1289c66c626a8162239289559d0/resource.tar.gz#test_sql_format.test_expr-many_opt_comp_/formatted.sql"
}
],
"test_sql_format.test[expr-minmax_for_complex_types]": [
{
"checksum": "8b0b147ac68f7815403586781d1066ce",
"size": 772,
"uri": "https://{canondata_backend}/1942671/92396c368500f27bb9a26ee02762281c902efb4e/resource.tar.gz#test_sql_format.test_expr-minmax_for_complex_types_/formatted.sql"
}
],
"test_sql_format.test[expr-non_persistable_group_by_column_fail]": [
{
"checksum": "fac6ed789b7f73290e6e7136f0770871",
Expand Down
19 changes: 19 additions & 0 deletions ydb/library/yql/tests/sql/suites/expr/minmax_for_complex_types.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
select
max_of(AsTuple(1u, 2), AsTuple(1, 1/0), AsTuple(1, 3)) as max_tuple,
min_of(AsTuple(1u, 2), AsTuple(1, 1/0), AsTuple(1, 3)) as min_tuple,

min_of(AsTuple(0, 1/0), AsTuple(1, 1/0), AsTuple(2, 1/0)) as min_tuple1,
max_of(AsTuple(0, 1/0), AsTuple(1, 1/0), AsTuple(2, 1/0)) as max_tuple1,

min_of(AsTuple(1, 1/0), AsTuple(1, 1)) as min_tuple2,
max_of(AsTuple(1, 1/0), AsTuple(1, 1)) as max_tuple2,

min_of(AsTuple(1, 1/0), AsTuple(1, 1/0)) as min_tuple3,
max_of(AsTuple(1, 1/0), AsTuple(1, 1/0)) as max_tuple3,

min_of(AsTuple(1, 1/0)) as min_tuple4,
max_of(AsTuple(1, 1/0)) as max_tuple4,


min_of(AsList(1, 2, 3), AsList(1, 1)) as min_list,
max_of(AsList(1, 2, 3), AsList(1, 1)) as max_list,
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,27 @@
"uri": "https://{canondata_backend}/1937001/37554cdd914aa454d296c9271a21521d48531395/resource.tar.gz#test.test_expr-literal_strings-default.txt-Results_/results.txt"
}
],
"test.test[expr-minmax_for_complex_types-default.txt-Debug]": [
{
"checksum": "c605d61ce59da0979c28c41c40be7ff1",
"size": 1080,
"uri": "https://{canondata_backend}/1871182/3e71c42f1b768f88bda8dbcb5531384a79c9053a/resource.tar.gz#test.test_expr-minmax_for_complex_types-default.txt-Debug_/opt.yql"
}
],
"test.test[expr-minmax_for_complex_types-default.txt-Plan]": [
{
"checksum": "b4dd508a329723c74293d80f0278c705",
"size": 505,
"uri": "https://{canondata_backend}/1937424/156b9d367c9bcbb4e60a3a94eaf810932e417ae3/resource.tar.gz#test.test_expr-minmax_for_complex_types-default.txt-Plan_/plan.txt"
}
],
"test.test[expr-minmax_for_complex_types-default.txt-Results]": [
{
"checksum": "6ef70ddefe7a127998a7444f57da41cf",
"size": 12651,
"uri": "https://{canondata_backend}/1871182/3e71c42f1b768f88bda8dbcb5531384a79c9053a/resource.tar.gz#test.test_expr-minmax_for_complex_types-default.txt-Results_/results.txt"
}
],
"test.test[expr-non_persistable_group_by_having_some_fail--Debug]": [],
"test.test[expr-non_persistable_group_by_having_some_fail--Plan]": [],
"test.test[expr-non_persistable_group_by_having_some_fail--Results]": [
Expand Down

0 comments on commit 6b2d235

Please sign in to comment.