Skip to content

Commit

Permalink
dq: add cbo cost functions (ydb-platform#7617)
Browse files Browse the repository at this point in the history
  • Loading branch information
yumkam authored Aug 14, 2024
1 parent 77fc854 commit d1f17fc
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 10 deletions.
70 changes: 69 additions & 1 deletion ydb/library/yql/providers/dq/opt/logical_optimize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,74 @@ bool IsStreamLookup(const TCoEquiJoinTuple& joinTuple) {

}

/**
* DQ Specific cost function and join applicability cost function
*/
struct TDqCBOProviderContext : public NYql::TBaseProviderContext {
TDqCBOProviderContext(TTypeAnnotationContext& typeCtx, const TDqConfiguration::TPtr& config)
: NYql::TBaseProviderContext()
, Config(config)
, TypesCtx(typeCtx) {}

virtual bool IsJoinApplicable(const std::shared_ptr<NYql::IBaseOptimizerNode>& left,
const std::shared_ptr<NYql::IBaseOptimizerNode>& right,
const std::set<std::pair<NYql::NDq::TJoinColumn, NYql::NDq::TJoinColumn>>& joinConditions,
const TVector<TString>& leftJoinKeys, const TVector<TString>& rightJoinKeys,
NYql::EJoinAlgoType joinAlgo, NYql::EJoinKind joinKind) override;

virtual double ComputeJoinCost(const NYql::TOptimizerStatistics& leftStats, const NYql::TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, NYql::EJoinAlgoType joinAlgo) const override;

TDqConfiguration::TPtr Config;
TTypeAnnotationContext& TypesCtx;
};


bool TDqCBOProviderContext::IsJoinApplicable(const std::shared_ptr<NYql::IBaseOptimizerNode>& left,
const std::shared_ptr<NYql::IBaseOptimizerNode>& right,
const std::set<std::pair<NYql::NDq::TJoinColumn, NYql::NDq::TJoinColumn>>& joinConditions,
const TVector<TString>& leftJoinKeys, const TVector<TString>& rightJoinKeys,
NYql::EJoinAlgoType joinAlgo, NYql::EJoinKind joinKind) {
Y_UNUSED(left);
Y_UNUSED(right);
Y_UNUSED(joinConditions);
Y_UNUSED(leftJoinKeys);
Y_UNUSED(rightJoinKeys);

switch(joinAlgo) {

case EJoinAlgoType::MapJoin:
if (joinKind == EJoinKind::OuterJoin || joinKind == EJoinKind::Exclusion)
return false;
if (auto hashJoinMode = Config->HashJoinMode.Get().GetOrElse(EHashJoinMode::Off);
hashJoinMode == EHashJoinMode::Off || hashJoinMode == EHashJoinMode::Map)
return true;
break;

case EJoinAlgoType::GraceJoin:
return true;

default:
break;
}
return false;
}


double TDqCBOProviderContext::ComputeJoinCost(const TOptimizerStatistics& leftStats, const TOptimizerStatistics& rightStats, const double outputRows, const double outputByteSize, EJoinAlgoType joinAlgo) const {
Y_UNUSED(outputByteSize);

switch(joinAlgo) {
case EJoinAlgoType::MapJoin:
return 1.5 * (leftStats.Nrows + 1.8 * rightStats.Nrows + outputRows);
case EJoinAlgoType::GraceJoin:
return 1.5 * (leftStats.Nrows + 2.0 * rightStats.Nrows + outputRows);
default:
Y_ENSURE(false, "Illegal join type encountered");
return 0;
}
}


class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase {
public:
TDqsLogicalOptProposalTransformer(TTypeAnnotationContext* typeCtx, const TDqConfiguration::TPtr& config)
Expand Down Expand Up @@ -206,7 +274,7 @@ class TDqsLogicalOptProposalTransformer : public TOptimizeTransformerBase {
};

std::unique_ptr<IOptimizerNew> opt;
TBaseProviderContext pctx;
TDqCBOProviderContext pctx(TypesCtx, Config);

switch (TypesCtx.CostBasedOptimizer) {
case ECostBasedOptimizerType::Native:
Expand Down
18 changes: 9 additions & 9 deletions ydb/library/yql/tests/sql/dq_file/part17/canondata/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -755,23 +755,23 @@
"test.test[dq-blacklisted_pragmas--Results]": [],
"test.test[dq-join_cbo_native_3_tables--Analyze]": [
{
"checksum": "94e6af2e865eab35e76cc9963452ad0d",
"size": 13889,
"uri": "https://{canondata_backend}/1924537/25b076e867ef14e000e544c6dba85dd8ef6e3620/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Analyze_/plan.txt"
"checksum": "90555f07378f801872485e6ac96dfd73",
"size": 12314,
"uri": "https://{canondata_backend}/1942278/d3f67196e7e0096e289743f5dbfd5dc2f990f9e6/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Analyze_/plan.txt"
}
],
"test.test[dq-join_cbo_native_3_tables--Debug]": [
{
"checksum": "fd20054511c7328de8f8c6c45539b48b",
"size": 5339,
"uri": "https://{canondata_backend}/1936273/7a32049e7d34640d0891b0eccadb21c671bd9ed5/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Debug_/opt.yql_patched"
"checksum": "91570a2f667516ba1f3f28642698441f",
"size": 4802,
"uri": "https://{canondata_backend}/1942278/d3f67196e7e0096e289743f5dbfd5dc2f990f9e6/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Debug_/opt.yql_patched"
}
],
"test.test[dq-join_cbo_native_3_tables--Plan]": [
{
"checksum": "94e6af2e865eab35e76cc9963452ad0d",
"size": 13889,
"uri": "https://{canondata_backend}/1924537/25b076e867ef14e000e544c6dba85dd8ef6e3620/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Plan_/plan.txt"
"checksum": "90555f07378f801872485e6ac96dfd73",
"size": 12314,
"uri": "https://{canondata_backend}/1942278/d3f67196e7e0096e289743f5dbfd5dc2f990f9e6/resource.tar.gz#test.test_dq-join_cbo_native_3_tables--Plan_/plan.txt"
}
],
"test.test[dq-join_cbo_native_3_tables--Results]": [
Expand Down

0 comments on commit d1f17fc

Please sign in to comment.