From 51303583c5264c2436b5cf48d60bc39a74b23d15 Mon Sep 17 00:00:00 2001 From: Hor911 Date: Thu, 13 Jun 2024 21:48:21 +0300 Subject: [PATCH] SQL hints map() and grace() (#5252) --- ydb/core/kqp/opt/physical/kqp_opt_phy.cpp | 11 +++--- ydb/library/yql/dq/opt/dq_opt_join.cpp | 25 +++++-------- ydb/library/yql/dq/opt/dq_opt_join.h | 4 +-- ydb/library/yql/dq/opt/dq_opt_phy.cpp | 21 +++++------ ydb/library/yql/sql/v1/join.cpp | 4 +++ ydb/library/yql/sql/v1/source.h | 4 ++- ydb/library/yql/sql/v1/sql_select.cpp | 4 +++ .../sql/dq_file/part17/canondata/result.json | 18 +++++----- .../sql/dq_file/part5/canondata/result.json | 36 +++++++++---------- .../sql/dq_file/part9/canondata/result.json | 18 +++++----- 10 files changed, 70 insertions(+), 75 deletions(-) diff --git a/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp b/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp index d3211c8c3df4..a28771a8f9da 100644 --- a/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp +++ b/ydb/core/kqp/opt/physical/kqp_opt_phy.cpp @@ -25,11 +25,10 @@ using TStatus = IGraphTransformer::TStatus; class TKqpPhysicalOptTransformer : public TOptimizeTransformerBase { public: - TKqpPhysicalOptTransformer(TTypeAnnotationContext& typesCtx, const TIntrusivePtr& kqpCtx, const TKikimrConfiguration::TPtr& config) + TKqpPhysicalOptTransformer(TTypeAnnotationContext& typesCtx, const TIntrusivePtr& kqpCtx) : TOptimizeTransformerBase(nullptr, NYql::NLog::EComponent::ProviderKqp, {}) , TypesCtx(typesCtx) , KqpCtx(*kqpCtx) - , Config(config) { #define HNDL(name) "KqpPhysical-"#name, Hndl(&TKqpPhysicalOptTransformer::name) AddHandler(0, &TDqSourceWrap::Match, HNDL(BuildStageWithSourceWrap)); @@ -430,9 +429,8 @@ class TKqpPhysicalOptTransformer : public TOptimizeTransformerBase { // TODO: Allow push to left stage for data queries. // It is now possible as we don't use datashard transactions for reads in data queries. bool pushLeftStage = !KqpCtx.IsDataQuery() && AllowFuseJoinInputs(node); - bool useCBO = Config->CostBasedOptimizationLevel.Get().GetOrElse(TDqSettings::TDefault::CostBasedOptimizationLevel) == 3; TExprBase output = DqBuildJoin(node, ctx, optCtx, *getParents(), IsGlobal, - pushLeftStage, KqpCtx.Config->GetHashJoinMode(), useCBO + pushLeftStage, KqpCtx.Config->GetHashJoinMode() ); DumpAppliedRule("BuildJoin", node.Ptr(), output.Ptr(), ctx); return output; @@ -595,13 +593,12 @@ class TKqpPhysicalOptTransformer : public TOptimizeTransformerBase { private: TTypeAnnotationContext& TypesCtx; const TKqpOptimizeContext& KqpCtx; - const TKikimrConfiguration::TPtr& Config; }; TAutoPtr CreateKqpPhyOptTransformer(const TIntrusivePtr& kqpCtx, - NYql::TTypeAnnotationContext& typesCtx, const TKikimrConfiguration::TPtr& config) + NYql::TTypeAnnotationContext& typesCtx, const TKikimrConfiguration::TPtr&) { - return THolder(new TKqpPhysicalOptTransformer(typesCtx, kqpCtx, config)); + return THolder(new TKqpPhysicalOptTransformer(typesCtx, kqpCtx)); } } // namespace NKikimr::NKqp::NOpt diff --git a/ydb/library/yql/dq/opt/dq_opt_join.cpp b/ydb/library/yql/dq/opt/dq_opt_join.cpp index 92217210aad7..518474a77288 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_join.cpp @@ -122,6 +122,13 @@ TMaybe BuildDqJoin(const TCoEquiJoinTuple& joinTuple, auto options = joinTuple.Options(); auto linkSettings = GetEquiJoinLinkSettings(options.Ref()); YQL_ENSURE(linkSettings.JoinAlgo != EJoinAlgoType::StreamLookupJoin || typeCtx.StreamLookupJoin, "Unsupported join strategy: streamlookup"); + + if (linkSettings.JoinAlgo == EJoinAlgoType::MapJoin) { + mode = EHashJoinMode::Map; + } else if (linkSettings.JoinAlgo == EJoinAlgoType::GraceJoin) { + mode = EHashJoinMode::GraceAndSelf; + } + bool leftAny = linkSettings.LeftHints.contains("any"); bool rightAny = linkSettings.RightHints.contains("any"); @@ -1119,26 +1126,10 @@ TExprNode::TPtr ReplaceJoinOnSide(TExprNode::TPtr&& input, const TTypeAnnotation } -TExprBase DqBuildHashJoin(const TDqJoin& join, EHashJoinMode mode, bool useCBO, TExprContext& ctx, IOptimizationContext& optCtx) { +TExprBase DqBuildHashJoin(const TDqJoin& join, EHashJoinMode mode, TExprContext& ctx, IOptimizationContext& optCtx) { const auto joinType = join.JoinType().Value(); YQL_ENSURE(joinType != "Cross"sv); - if (useCBO) { - auto joinAlgo = FromString(join.JoinAlgo().StringValue()); - switch (joinAlgo) { - case EJoinAlgoType::LookupJoin: - case EJoinAlgoType::MapJoin: - mode = EHashJoinMode::Map; - break; - case EJoinAlgoType::GraceJoin: - mode = EHashJoinMode::GraceAndSelf; - break; - default: - break; - } - - } - const auto leftIn = join.LeftInput().Cast().Output(); const auto rightIn = join.RightInput().Cast().Output(); diff --git a/ydb/library/yql/dq/opt/dq_opt_join.h b/ydb/library/yql/dq/opt/dq_opt_join.h index 6568ecccb51f..43556212fe61 100644 --- a/ydb/library/yql/dq/opt/dq_opt_join.h +++ b/ydb/library/yql/dq/opt/dq_opt_join.h @@ -17,9 +17,9 @@ NNodes::TExprBase DqRewriteEquiJoin(const NNodes::TExprBase& node, EHashJoinMode NNodes::TExprBase DqBuildPhyJoin(const NNodes::TDqJoin& join, bool pushLeftStage, TExprContext& ctx, IOptimizationContext& optCtx); NNodes::TExprBase DqBuildJoin(const NNodes::TExprBase& node, TExprContext& ctx, - IOptimizationContext& optCtx, const TParentsMap& parentsMap, bool allowStageMultiUsage, bool pushLeftStage, EHashJoinMode hashJoin = EHashJoinMode::Off, bool useCBO = false); + IOptimizationContext& optCtx, const TParentsMap& parentsMap, bool allowStageMultiUsage, bool pushLeftStage, EHashJoinMode hashJoin = EHashJoinMode::Off); -NNodes::TExprBase DqBuildHashJoin(const NNodes::TDqJoin& join, EHashJoinMode mode, bool useCBO, TExprContext& ctx, IOptimizationContext& optCtx); +NNodes::TExprBase DqBuildHashJoin(const NNodes::TDqJoin& join, EHashJoinMode mode, TExprContext& ctx, IOptimizationContext& optCtx); NNodes::TExprBase DqBuildJoinDict(const NNodes::TDqJoin& join, TExprContext& ctx); diff --git a/ydb/library/yql/dq/opt/dq_opt_phy.cpp b/ydb/library/yql/dq/opt/dq_opt_phy.cpp index 0d3ff1b74b43..46788fed2daa 100644 --- a/ydb/library/yql/dq/opt/dq_opt_phy.cpp +++ b/ydb/library/yql/dq/opt/dq_opt_phy.cpp @@ -2655,7 +2655,7 @@ TMaybeNode DqFlipJoin(const TDqJoin& join, TExprContext& ctx) { TExprBase DqBuildJoin(const TExprBase& node, TExprContext& ctx, IOptimizationContext& optCtx, - const TParentsMap& parentsMap, bool allowStageMultiUsage, bool pushLeftStage, EHashJoinMode hashJoin, bool useCBO) + const TParentsMap& parentsMap, bool allowStageMultiUsage, bool pushLeftStage, EHashJoinMode hashJoin) { if (!node.Maybe()) { return node; @@ -2666,21 +2666,18 @@ TExprBase DqBuildJoin(const TExprBase& node, TExprContext& ctx, IOptimizationCon const bool leftIsUnionAll = join.LeftInput().Maybe().IsValid(); const bool rightIsUnionAll = join.RightInput().Maybe().IsValid(); + auto joinAlgo = FromString(join.JoinAlgo().StringValue()); + if (joinAlgo == EJoinAlgoType::MapJoin) { + hashJoin = EHashJoinMode::Map; + } else if (joinAlgo == EJoinAlgoType::GraceJoin) { + hashJoin = EHashJoinMode::GraceAndSelf; + } + bool useHashJoin = EHashJoinMode::Off != hashJoin && joinType != "Cross"sv && leftIsUnionAll && rightIsUnionAll; - if (useCBO) { - auto joinAlgo = FromString(join.JoinAlgo().StringValue()); - if (joinAlgo == EJoinAlgoType::MapJoin || joinAlgo == EJoinAlgoType::GraceJoin) { - useHashJoin = joinType != "Cross"sv && leftIsUnionAll && rightIsUnionAll; - } - else { - useHashJoin = false; - } - } - if (DqValidateJoinInputs(join.LeftInput(), join.RightInput(), parentsMap, allowStageMultiUsage)) { // pass } else if (DqValidateJoinInputs(join.RightInput(), join.LeftInput(), parentsMap, allowStageMultiUsage)) { @@ -2696,7 +2693,7 @@ TExprBase DqBuildJoin(const TExprBase& node, TExprContext& ctx, IOptimizationCon } if (useHashJoin) { - return DqBuildHashJoin(join, hashJoin, useCBO, ctx, optCtx); + return DqBuildHashJoin(join, hashJoin, ctx, optCtx); } if (joinType == "Full"sv || joinType == "Exclusion"sv) { diff --git a/ydb/library/yql/sql/v1/join.cpp b/ydb/library/yql/sql/v1/join.cpp index 8f025e131dea..b4edbed62793 100644 --- a/ydb/library/yql/sql/v1/join.cpp +++ b/ydb/library/yql/sql/v1/join.cpp @@ -503,6 +503,10 @@ class TEquiJoin: public TJoinBase { linkOptions = L(linkOptions, Q(Y(Q("forceSortedMerge")))); } else if (TJoinLinkSettings::EStrategy::StreamLookup == descr.LinkSettings.Strategy) { linkOptions = L(linkOptions, Q(Y(Q("forceStreamLookup")))); + } else if (TJoinLinkSettings::EStrategy::ForceMap == descr.LinkSettings.Strategy) { + linkOptions = L(linkOptions, Q(Y(Q("join_algo"), Q("MapJoin")))); + } else if (TJoinLinkSettings::EStrategy::ForceGrace == descr.LinkSettings.Strategy) { + linkOptions = L(linkOptions, Q(Y(Q("join_algo"), Q("GraceJoin")))); } if (leftAny) { linkOptions = L(linkOptions, Q(Y(Q("left"), Q("any")))); diff --git a/ydb/library/yql/sql/v1/source.h b/ydb/library/yql/sql/v1/source.h index 751fb0bf14af..a58402f1dc19 100644 --- a/ydb/library/yql/sql/v1/source.h +++ b/ydb/library/yql/sql/v1/source.h @@ -167,7 +167,9 @@ namespace NSQLTranslationV1 { enum class EStrategy { Default, SortedMerge, - StreamLookup + StreamLookup, + ForceMap, + ForceGrace }; EStrategy Strategy = EStrategy::Default; }; diff --git a/ydb/library/yql/sql/v1/sql_select.cpp b/ydb/library/yql/sql/v1/sql_select.cpp index 2700ba0e99ff..f6a77fbbe417 100644 --- a/ydb/library/yql/sql/v1/sql_select.cpp +++ b/ydb/library/yql/sql/v1/sql_select.cpp @@ -47,6 +47,10 @@ bool TSqlSelect::JoinOp(ISource* join, const TRule_join_source::TBlock3& block, newStrategy = TJoinLinkSettings::EStrategy::SortedMerge; } else if (canonizedName == "streamlookup") { newStrategy = TJoinLinkSettings::EStrategy::StreamLookup; + } else if (canonizedName == "map") { + newStrategy = TJoinLinkSettings::EStrategy::ForceMap; + } else if (canonizedName == "grace") { + newStrategy = TJoinLinkSettings::EStrategy::ForceGrace; } else { Ctx.Warning(hint.Pos, TIssuesIds::YQL_UNUSED_HINT) << "Unsupported join strategy: " << hint.Name; } diff --git a/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json index 2a0ef4ab8454..07cdabe70962 100644 --- a/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json @@ -733,23 +733,23 @@ "test.test[dq-blacklisted_pragmas--Results]": [], "test.test[dq-join_cbo_native_3_tables--Analyze]": [ { - "checksum": "90555f07378f801872485e6ac96dfd73", - "size": 12314, - "uri": "https://{canondata_backend}/1936273/1d5a5a67f967b4eae3979e08a19421fc7e07c120/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Analyze_/plan.txt" + "checksum": "94e6af2e865eab35e76cc9963452ad0d", + "size": 13889, + "uri": "https://{canondata_backend}/1924537/25b076e867ef14e000e544c6dba85dd8ef6e3620/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Analyze_/plan.txt" } ], "test.test[dq-join_cbo_native_3_tables--Debug]": [ { - "checksum": "6f59cd88e8a1433c350d9c657c47f8e7", - "size": 4756, - "uri": "https://{canondata_backend}/1936273/1d5a5a67f967b4eae3979e08a19421fc7e07c120/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Debug_/opt.yql_patched" + "checksum": "3c08870f1e21f80ad8c472110e5c3f5d", + "size": 5289, + "uri": "https://{canondata_backend}/1924537/25b076e867ef14e000e544c6dba85dd8ef6e3620/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Debug_/opt.yql_patched" } ], "test.test[dq-join_cbo_native_3_tables--Plan]": [ { - "checksum": "90555f07378f801872485e6ac96dfd73", - "size": 12314, - "uri": "https://{canondata_backend}/1936273/1d5a5a67f967b4eae3979e08a19421fc7e07c120/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Plan_/plan.txt" + "checksum": "94e6af2e865eab35e76cc9963452ad0d", + "size": 13889, + "uri": "https://{canondata_backend}/1924537/25b076e867ef14e000e544c6dba85dd8ef6e3620/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Plan_/plan.txt" } ], "test.test[dq-join_cbo_native_3_tables--Results]": [ diff --git a/ydb/library/yql/tests/sql/dq_file/part5/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part5/canondata/result.json index daf8d5a7cc86..c9bce0e6d28c 100644 --- a/ydb/library/yql/tests/sql/dq_file/part5/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part5/canondata/result.json @@ -1464,45 +1464,45 @@ ], "test.test[join-join_left_cbo--Analyze]": [ { - "checksum": "bed6085b162c0c5d323d2cba86327e91", - "size": 6907, - "uri": "https://{canondata_backend}/1936997/3b4a22ca6dc6f7fdc9a243be5c610aa02e850d48/resource.tar.gz#test.test_join-join_left_cbo--Analyze_/plan.txt" + "checksum": "60f726fee0973238b3adf2fe55835186", + "size": 7432, + "uri": "https://{canondata_backend}/1942100/50ef34247500569eb5a4a2bd9f3afffdda4ff62e/resource.tar.gz#test.test_join-join_left_cbo--Analyze_/plan.txt" } ], "test.test[join-join_left_cbo--Debug]": [ { - "checksum": "53b7fd938ee1fba4deec3ec69af4e6b8", - "size": 2595, - "uri": "https://{canondata_backend}/1942100/b7c3148d85bada3dcb5f80c113a4345cbf0e421e/resource.tar.gz#test.test_join-join_left_cbo--Debug_/opt.yql_patched" + "checksum": "d1792965a95853a2b1949ba40f647ca5", + "size": 2747, + "uri": "https://{canondata_backend}/1942100/50ef34247500569eb5a4a2bd9f3afffdda4ff62e/resource.tar.gz#test.test_join-join_left_cbo--Debug_/opt.yql_patched" } ], "test.test[join-join_left_cbo--Plan]": [ { - "checksum": "bed6085b162c0c5d323d2cba86327e91", - "size": 6907, - "uri": "https://{canondata_backend}/1936997/3b4a22ca6dc6f7fdc9a243be5c610aa02e850d48/resource.tar.gz#test.test_join-join_left_cbo--Plan_/plan.txt" + "checksum": "60f726fee0973238b3adf2fe55835186", + "size": 7432, + "uri": "https://{canondata_backend}/1942100/50ef34247500569eb5a4a2bd9f3afffdda4ff62e/resource.tar.gz#test.test_join-join_left_cbo--Plan_/plan.txt" } ], "test.test[join-join_left_cbo--Results]": [], "test.test[join-join_right_cbo--Analyze]": [ { - "checksum": "376f7ae8049d86ef3d5f3f3418e059bd", - "size": 6907, - "uri": "https://{canondata_backend}/1936997/3b4a22ca6dc6f7fdc9a243be5c610aa02e850d48/resource.tar.gz#test.test_join-join_right_cbo--Analyze_/plan.txt" + "checksum": "2354227a93396a355919ebdb23ea7869", + "size": 7432, + "uri": "https://{canondata_backend}/1942100/50ef34247500569eb5a4a2bd9f3afffdda4ff62e/resource.tar.gz#test.test_join-join_right_cbo--Analyze_/plan.txt" } ], "test.test[join-join_right_cbo--Debug]": [ { - "checksum": "7d9bd70a94f140c7fa48c78d6ba901e7", - "size": 2595, - "uri": "https://{canondata_backend}/1942100/b7c3148d85bada3dcb5f80c113a4345cbf0e421e/resource.tar.gz#test.test_join-join_right_cbo--Debug_/opt.yql_patched" + "checksum": "e6f770b871662a4dfcf845ddc66acbc2", + "size": 2747, + "uri": "https://{canondata_backend}/1942100/50ef34247500569eb5a4a2bd9f3afffdda4ff62e/resource.tar.gz#test.test_join-join_right_cbo--Debug_/opt.yql_patched" } ], "test.test[join-join_right_cbo--Plan]": [ { - "checksum": "376f7ae8049d86ef3d5f3f3418e059bd", - "size": 6907, - "uri": "https://{canondata_backend}/1936997/3b4a22ca6dc6f7fdc9a243be5c610aa02e850d48/resource.tar.gz#test.test_join-join_right_cbo--Plan_/plan.txt" + "checksum": "2354227a93396a355919ebdb23ea7869", + "size": 7432, + "uri": "https://{canondata_backend}/1942100/50ef34247500569eb5a4a2bd9f3afffdda4ff62e/resource.tar.gz#test.test_join-join_right_cbo--Plan_/plan.txt" } ], "test.test[join-join_right_cbo--Results]": [], diff --git a/ydb/library/yql/tests/sql/dq_file/part9/canondata/result.json b/ydb/library/yql/tests/sql/dq_file/part9/canondata/result.json index 017cef194311..0c4898021aa9 100644 --- a/ydb/library/yql/tests/sql/dq_file/part9/canondata/result.json +++ b/ydb/library/yql/tests/sql/dq_file/part9/canondata/result.json @@ -1130,23 +1130,23 @@ "test.test[join-inner_grouped--Results]": [], "test.test[join-join_cbo_3_tables--Analyze]": [ { - "checksum": "c6971c40f475171444c47b0a048c94ea", - "size": 12313, - "uri": "https://{canondata_backend}/1871002/c040fe83a73e26552347f2cdc9c9be9e70a9f948/resource.tar.gz#test.test_join-join_cbo_3_tables--Analyze_/plan.txt" + "checksum": "ea10daa84032188803becbad8e320140", + "size": 13888, + "uri": "https://{canondata_backend}/1937429/6f0b11a050cc6c6080ee22988ff3362313e9fdca/resource.tar.gz#test.test_join-join_cbo_3_tables--Analyze_/plan.txt" } ], "test.test[join-join_cbo_3_tables--Debug]": [ { - "checksum": "b238426908adc70741316a341299b1aa", - "size": 4646, - "uri": "https://{canondata_backend}/1871182/e74dc45050363faa345feb7a557afc518d879b69/resource.tar.gz#test.test_join-join_cbo_3_tables--Debug_/opt.yql_patched" + "checksum": "9eeae6a6ae72ad4a1d0fb813144ba0b3", + "size": 5036, + "uri": "https://{canondata_backend}/1937429/6f0b11a050cc6c6080ee22988ff3362313e9fdca/resource.tar.gz#test.test_join-join_cbo_3_tables--Debug_/opt.yql_patched" } ], "test.test[join-join_cbo_3_tables--Plan]": [ { - "checksum": "c6971c40f475171444c47b0a048c94ea", - "size": 12313, - "uri": "https://{canondata_backend}/1871002/c040fe83a73e26552347f2cdc9c9be9e70a9f948/resource.tar.gz#test.test_join-join_cbo_3_tables--Plan_/plan.txt" + "checksum": "ea10daa84032188803becbad8e320140", + "size": 13888, + "uri": "https://{canondata_backend}/1937429/6f0b11a050cc6c6080ee22988ff3362313e9fdca/resource.tar.gz#test.test_join-join_cbo_3_tables--Plan_/plan.txt" } ], "test.test[join-join_cbo_3_tables--Results]": [],