diff --git a/src/jrd/RecordSourceNodes.cpp b/src/jrd/RecordSourceNodes.cpp index 47367b41dce..b49868643ed 100644 --- a/src/jrd/RecordSourceNodes.cpp +++ b/src/jrd/RecordSourceNodes.cpp @@ -52,6 +52,167 @@ static void genDeliverUnmapped(CompilerScratch* csb, const BoolExprNodeStack& pa static ValueExprNode* resolveUsingField(DsqlCompilerScratch* dsqlScratch, const MetaName& name, ValueListNode* list, const FieldNode* flawedNode, const TEXT* side, dsql_ctx*& ctx); +namespace +{ + // Search through the list of ANDed booleans to find comparisons + // referring streams of parent select expressions. + // Extract those booleans and return them to the caller. + + bool findDependentBooleans(CompilerScratch* csb, + const StreamList& rseStreams, + BoolExprNode** parentBoolean, + BoolExprNodeStack& booleanStack) + { + const auto boolean = *parentBoolean; + + const auto binaryNode = nodeAs(boolean); + if (binaryNode && binaryNode->blrOp == blr_and) + { + const bool found1 = findDependentBooleans(csb, rseStreams, + binaryNode->arg1.getAddress(), booleanStack); + const bool found2 = findDependentBooleans(csb, rseStreams, + binaryNode->arg2.getAddress(), booleanStack); + + if (!binaryNode->arg1 && !binaryNode->arg2) + *parentBoolean = nullptr; + else if (!binaryNode->arg1) + *parentBoolean = binaryNode->arg2; + else if (!binaryNode->arg2) + *parentBoolean = binaryNode->arg1; + + return (found1 || found2); + } + + if (const auto cmpNode = nodeAs(boolean)) + { + if (cmpNode->blrOp == blr_eql || cmpNode->blrOp == blr_equiv) + { + SortedStreamList streams; + cmpNode->collectStreams(streams); + + for (const auto stream : streams) + { + if (rseStreams.exist(stream)) + { + booleanStack.push(boolean); + *parentBoolean = nullptr; + return true; + } + } + } + } + + return false; + } + + // Search through the list of ANDed booleans to find correlated EXISTS/IN sub-queries. + // They are candidates to be converted into semi- or anti-joins. + + bool findPossibleJoins(CompilerScratch* csb, + const StreamList& rseStreams, + BoolExprNode** parentBoolean, + RecordSourceNodeStack& rseStack, + BoolExprNodeStack& booleanStack) + { + auto boolNode = *parentBoolean; + + const auto binaryNode = nodeAs(boolNode); + if (binaryNode && binaryNode->blrOp == blr_and) + { + const bool found1 = findPossibleJoins(csb, rseStreams, + binaryNode->arg1.getAddress(), rseStack, booleanStack); + const bool found2 = findPossibleJoins(csb, rseStreams, + binaryNode->arg2.getAddress(), rseStack, booleanStack); + + if (!binaryNode->arg1 && !binaryNode->arg2) + *parentBoolean = nullptr; + else if (!binaryNode->arg1) + *parentBoolean = binaryNode->arg2; + else if (!binaryNode->arg2) + *parentBoolean = binaryNode->arg1; + + return (found1 || found2); + } + + const auto rseNode = nodeAs(boolNode); + // Both EXISTS (blr_any) and IN (blr_ansi_any) sub-queries are handled + if (rseNode && (rseNode->blrOp == blr_any || rseNode->blrOp == blr_ansi_any)) + { + auto rse = rseNode->rse; + fb_assert(rse && (rse->flags & RseNode::FLAG_SUB_QUERY)); + + if (rse->rse_boolean && rse->rse_jointype == blr_inner && + !rse->rse_first && !rse->rse_skip && !rse->rse_plan) + { + // Find booleans convertable into semi-joins + + StreamList streams; + rse->computeRseStreams(streams); + + BoolExprNodeStack booleans; + if (findDependentBooleans(csb, rseStreams, + rse->rse_boolean.getAddress(), + booleans)) + { + // Compose the conjunct boolean + + fb_assert(booleans.hasData()); + auto boolean = booleans.pop(); + while (booleans.hasData()) + { + const auto andNode = FB_NEW_POOL(csb->csb_pool) + BinaryBoolNode(csb->csb_pool, blr_and); + andNode->arg1 = boolean; + andNode->arg2 = booleans.pop(); + boolean = andNode; + } + + // Ensure that no external references are left inside the subquery. + // If so, mark the RSE as joined and add it to the stack. + + SortedStreamList streams; + rse->collectStreams(streams); + + bool dependent = false; + for (const auto stream : streams) + { + if (rseStreams.exist(stream)) + { + dependent = true; + break; + } + } + + if (!dependent) + { + rse->flags &= ~RseNode::FLAG_SUB_QUERY; + rse->flags |= RseNode::FLAG_SEMI_JOINED; + rseStack.push(rse); + booleanStack.push(boolean); + *parentBoolean = nullptr; + return true; + } + + // Otherwise, restore the original sub-query by adding + // the collected booleans back to the RSE. + + if (rse->rse_boolean) + { + const auto andNode = FB_NEW_POOL(csb->csb_pool) + BinaryBoolNode(csb->csb_pool, blr_and); + andNode->arg1 = boolean; + andNode->arg2 = rse->rse_boolean; + boolean = andNode; + } + + rse->rse_boolean = boolean; + } + } + } + + return false; + } +} //-------------------- @@ -2992,6 +3153,9 @@ RseNode* RseNode::pass1(thread_db* tdbb, CompilerScratch* csb) { SET_TDBB(tdbb); + if (const auto newRse = processPossibleJoins(tdbb, csb)) + return newRse->pass1(tdbb, csb); + // for scoping purposes, maintain a stack of RseNode's which are // currently being parsed; if there are none on the stack as // yet, mark the RseNode as variant to make sure that statement- @@ -3097,6 +3261,12 @@ RseNode* RseNode::pass1(thread_db* tdbb, CompilerScratch* csb) void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse, BoolExprNode** boolean, RecordSourceNodeStack& stack) { + if (const auto newRse = processPossibleJoins(tdbb, csb)) + { + newRse->pass1Source(tdbb, csb, rse, boolean, stack); + return; + } + if (rse_jointype != blr_inner) { // Check whether any of the upper level booleans (those belonging to the WHERE clause) @@ -3150,7 +3320,7 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse, } } - // in the case of an RseNode, it is possible that a new RseNode will be generated, + // In the case of an RseNode, it is possible that a new RseNode will be generated, // so wait to process the source before we push it on the stack (bug 8039) // The addition of the JOIN syntax for specifying inner joins causes an @@ -3158,7 +3328,7 @@ void RseNode::pass1Source(thread_db* tdbb, CompilerScratch* csb, RseNode* rse, // where we are just trying to inner join more than 2 streams. If possible, // try to flatten the tree out before we go any further. - if (!isLateral() && + if (!isLateral() && !isSemiJoined() && rse->rse_jointype == blr_inner && rse_jointype == blr_inner && !rse_sorted && !rse_projection && @@ -3263,11 +3433,11 @@ RecordSource* RseNode::compile(thread_db* tdbb, Optimizer* opt, bool innerSubStr StreamStateHolder stateHolder(csb, opt->getOuterStreams()); - if (opt->isLeftJoin() || isLateral()) + if (opt->isLeftJoin() || isLateral() || isSemiJoined()) { stateHolder.activate(); - if (opt->isLeftJoin()) + if (opt->isLeftJoin() || isSemiJoined()) { // Push all conjuncts except "missing" ones (e.g. IS NULL) for (auto iter = opt->getConjuncts(false, true); iter.hasData(); ++iter) @@ -3290,6 +3460,87 @@ RecordSource* RseNode::compile(thread_db* tdbb, Optimizer* opt, bool innerSubStr return opt->compile(this, &conjunctStack); } +RseNode* RseNode::processPossibleJoins(thread_db* tdbb, CompilerScratch* csb) +{ + if (rse_jointype != blr_inner || !rse_boolean || rse_plan) + return nullptr; + + // If the sub-query is nested inside the other sub-query which wasn't converted into semi-join, + // it makes no sense to apply a semi-join at the deeper levels, as a sub-query is expected + // to be executed repeatedly. + // This is a temporary fix until nested loop semi-joins are allowed by the optimizer. + + if (flags & FLAG_SUB_QUERY) + return nullptr; + + for (const auto node : csb->csb_current_nodes) + { + if (const auto rse = nodeAs(node)) + { + if (rse->flags & FLAG_SUB_QUERY) + return nullptr; + } + } + + RecordSourceNodeStack rseStack; + BoolExprNodeStack booleanStack; + + // Find possibly joinable sub-queries + + StreamList rseStreams; + computeRseStreams(rseStreams); + + if (!findPossibleJoins(csb, rseStreams, rse_boolean.getAddress(), rseStack, booleanStack)) + return nullptr; + + fb_assert(rseStack.hasData() && booleanStack.hasData()); + fb_assert(rseStack.getCount() == booleanStack.getCount()); + + // Create joins between the original node and detected joinable nodes. + // Preserve FIRST/SKIP nodes at their original position, i.e. outside semi-joins. + + const auto first = rse_first; + rse_first = nullptr; + + const auto skip = rse_skip; + rse_skip = nullptr; + + const auto orgFlags = flags; + flags = 0; + + auto rse = this; + while (rseStack.hasData()) + { + const auto newRse = FB_NEW_POOL(*tdbb->getDefaultPool()) + RseNode(*tdbb->getDefaultPool()); + + newRse->rse_relations.add(rse); + newRse->rse_relations.add(rseStack.pop()); + + newRse->rse_jointype = blr_inner; + newRse->rse_boolean = booleanStack.pop(); + + rse = newRse; + } + + if (first || skip) + { + const auto newRse = FB_NEW_POOL(*tdbb->getDefaultPool()) + RseNode(*tdbb->getDefaultPool()); + + newRse->rse_relations.add(rse); + newRse->rse_jointype = blr_inner; + newRse->rse_first = first; + newRse->rse_skip = skip; + + rse = newRse; + } + + rse->flags = orgFlags; + + return rse; +} + // Check that all streams in the RseNode have a plan specified for them. // If they are not, there are streams in the RseNode which were not mentioned in the plan. void RseNode::planCheck(const CompilerScratch* csb) const diff --git a/src/jrd/RecordSourceNodes.h b/src/jrd/RecordSourceNodes.h index ce4f7c0d51d..1bf53028570 100644 --- a/src/jrd/RecordSourceNodes.h +++ b/src/jrd/RecordSourceNodes.h @@ -718,14 +718,15 @@ class RseNode final : public TypedNode dsqlFirst; diff --git a/src/jrd/SysFunction.cpp b/src/jrd/SysFunction.cpp index 553f5de69bc..0347d9b8098 100644 --- a/src/jrd/SysFunction.cpp +++ b/src/jrd/SysFunction.cpp @@ -6969,7 +6969,7 @@ const SysFunction SysFunction::functions[] = {"UNICODE_CHAR", 1, 1, true, setParamsInteger, makeUnicodeChar, evlUnicodeChar, NULL}, {"UNICODE_VAL", 1, 1, true, setParamsUnicodeVal, makeLongResult, evlUnicodeVal, NULL}, {"UUID_TO_CHAR", 1, 1, true, setParamsUuidToChar, makeUuidToChar, evlUuidToChar, NULL}, - {"", 0, 0, NULL, NULL, NULL, NULL} + {"", 0, 0, false, NULL, NULL, NULL, NULL} }; diff --git a/src/jrd/optimizer/InnerJoin.cpp b/src/jrd/optimizer/InnerJoin.cpp index afd26c60cb1..a49e0ae3f31 100644 --- a/src/jrd/optimizer/InnerJoin.cpp +++ b/src/jrd/optimizer/InnerJoin.cpp @@ -108,6 +108,7 @@ void InnerJoin::calculateStreamInfo() innerStream->baseIndexes = candidate->indexes; innerStream->baseUnique = candidate->unique; innerStream->baseNavigated = candidate->navigated; + innerStream->baseConjuncts = candidate->conjuncts; csb->csb_rpt[innerStream->number].deactivate(); } @@ -579,13 +580,39 @@ River* InnerJoin::formRiver() // Create a hash join rsb = FB_NEW_POOL(getPool()) - HashJoin(tdbb, csb, 2, hashJoinRsbs, keys.begin(), stream.selectivity); + HashJoin(tdbb, csb, INNER_JOIN, 2, hashJoinRsbs, keys.begin(), stream.selectivity); // Clear priorly processed rsb's, as they're already incorporated into a hash join rsbs.clear(); } else + { + StreamList depStreams; + + if (optimizer->isSemiJoined() && rsbs.isEmpty()) + { + const auto baseStream = getStreamInfo(stream.number); + for (const auto boolean : baseStream->baseConjuncts) + { + if (optimizer->checkEquiJoin(boolean)) + { + SortedStreamList nodeStreams; + boolean->collectStreams(nodeStreams); + + for (const auto stream : nodeStreams) + { + if (stream != baseStream->number && !depStreams.exist(stream)) + depStreams.add(stream); + } + } + } + } + + StreamStateHolder stateHolder(csb, depStreams); + stateHolder.deactivate(); + rsb = optimizer->generateRetrieval(stream.number, sortPtr, false, false); + } rsbs.add(rsb); streams.add(stream.number); diff --git a/src/jrd/optimizer/Optimizer.cpp b/src/jrd/optimizer/Optimizer.cpp index 2932ebad963..ecda7320152 100644 --- a/src/jrd/optimizer/Optimizer.cpp +++ b/src/jrd/optimizer/Optimizer.cpp @@ -168,9 +168,14 @@ namespace class CrossJoin : public River { public: - CrossJoin(CompilerScratch* csb, RiverList& rivers) - : River(csb, nullptr, rivers) + CrossJoin(Optimizer* opt, RiverList& rivers, JoinType joinType) + : River(opt->getCompilerScratch(), nullptr, rivers) { + fb_assert(joinType != OUTER_JOIN); + + const auto csb = opt->getCompilerScratch(); + Optimizer::ConjunctIterator iter(opt->getBaseConjuncts()); + // Save states of the underlying streams and restore them afterwards StreamStateHolder stateHolder(csb, m_streams); @@ -182,57 +187,80 @@ namespace if (riverCount == 1) { - River* const sub_river = rivers.pop(); - m_rsb = sub_river->getRecordSource(); + const auto subRiver = rivers.pop(); + const auto subRsb = subRiver->getRecordSource(); + subRiver->activate(csb); + m_rsb = opt->applyBoolean(subRsb, iter); } else { HalfStaticArray rsbs(riverCount); - // Reorder input rivers according to their possible inter-dependencies - - while (rivers.hasData()) + if (joinType == INNER_JOIN) { - const auto orgCount = rsbs.getCount(); + // Reorder input rivers according to their possible inter-dependencies - for (auto& subRiver : rivers) + while (rivers.hasData()) { - const auto subRsb = subRiver->getRecordSource(); - fb_assert(!rsbs.exist(subRsb)); + const auto orgCount = rsbs.getCount(); - subRiver->activate(csb); - - if (subRiver->isComputable(csb)) + for (auto& subRiver : rivers) { - rsbs.add(subRsb); - rivers.remove(&subRiver); - break; + auto subRsb = subRiver->getRecordSource(); + + subRiver->activate(csb); + subRsb = opt->applyBoolean(subRsb, iter); + + if (subRiver->isComputable(csb)) + { + rsbs.add(subRsb); + rivers.remove(&subRiver); + break; + } + + subRiver->deactivate(csb); } - subRiver->deactivate(csb); + if (rsbs.getCount() == orgCount) + break; } - if (rsbs.getCount() == orgCount) - break; - } + if (rivers.hasData()) + { + // Ideally, we should never get here. But just in case it happened, handle it. - if (rivers.hasData()) - { - // Ideally, we should never get here. But just in case it happened, handle it. + for (auto& subRiver : rivers) + { + auto subRsb = subRiver->getRecordSource(); - for (auto& subRiver : rivers) + subRiver->activate(csb); + subRsb = opt->applyBoolean(subRsb, iter); + + const auto pos = &subRiver - rivers.begin(); + rsbs.insert(pos, subRsb); + } + + rivers.clear(); + } + } + else + { + for (const auto subRiver : rivers) { - const auto subRsb = subRiver->getRecordSource(); - fb_assert(!rsbs.exist(subRsb)); + auto subRsb = subRiver->getRecordSource(); + + subRiver->activate(csb); + if (subRiver != rivers.front()) + subRsb = opt->applyBoolean(subRsb, iter); - const auto pos = &subRiver - rivers.begin(); - rsbs.insert(pos, subRsb); + rsbs.add(subRsb); } rivers.clear(); } - m_rsb = FB_NEW_POOL(csb->csb_pool) NestedLoopJoin(csb, rsbs.getCount(), rsbs.begin()); + m_rsb = FB_NEW_POOL(csb->csb_pool) + NestedLoopJoin(csb, rsbs.getCount(), rsbs.begin(), joinType); } } }; @@ -267,7 +295,6 @@ namespace } } - unsigned getRiverCount(unsigned count, const ValueExprNode* const* eq_class) { // Given an sort/merge join equivalence class (vector of node pointers @@ -672,7 +699,7 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) // AB: If we have limit our retrieval with FIRST / SKIP syntax then // we may not deliver above conditions (from higher rse's) to this // rse, because the results should be consistent. - if (rse->rse_skip || rse->rse_first) + if (rse->rse_skip || rse->rse_first || isSemiJoined()) parentStack = nullptr; // Set base-point before the parent/distributed nodes begin. @@ -814,7 +841,7 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) // Go through the record selection expression generating // record source blocks for all streams - RiverList rivers, dependentRivers; + RiverList rivers, dependentRivers, specialRivers; bool innerSubStream = false; for (auto node : rse->rse_relations) @@ -822,6 +849,11 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) fb_assert(sort == rse->rse_sorted); fb_assert(aggregate == rse->rse_aggregate); + const auto subRse = nodeAs(node); + + const bool semiJoin = (subRse && subRse->isSemiJoined()); + fb_assert(!semiJoin || rse->rse_jointype == blr_inner); + // Find the stream number and place it at the end of the bedStreams array // (if this is really a stream and not another RseNode) @@ -860,7 +892,11 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) if (computable) { outerStreams.join(localStreams); - rivers.add(river); + + if (semiJoin) + specialRivers.add(river); + else + rivers.add(river); } else { @@ -869,6 +905,7 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) } else { + fb_assert(!semiJoin); // We have a relation, just add its stream fb_assert(bedStreams.hasData()); outerStreams.add(bedStreams.back()); @@ -883,11 +920,6 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) if (compileStreams.getCount() > 5) CCH_expand(tdbb, (ULONG) (compileStreams.getCount() * CACHE_PAGES_PER_STREAM)); - // At this point we are ready to start optimizing. - // We will use the opt block to hold information of - // a global nature, meaning that it needs to stick - // around for the rest of the optimization process. - // Attempt to optimize aggregates via an index, if possible if (aggregate && !sort) sort = aggregate; @@ -921,6 +953,8 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) } else { + JoinType joinType = INNER_JOIN; + // AB: If previous rsb's are already on the stack we can't use // a navigational-retrieval for an ORDER BY because the next // streams are JOINed to the previous ones @@ -931,7 +965,7 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) // AB: We could already have multiple rivers at this // point so try to do some hashing or sort/merging now. - while (generateEquiJoin(rivers)) + while (generateEquiJoin(rivers, joinType)) ; } @@ -968,7 +1002,7 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) // Generate one river which holds a cross join rsb between // all currently available rivers - rivers.add(FB_NEW_POOL(getPool()) CrossJoin(csb, rivers)); + rivers.add(FB_NEW_POOL(getPool()) CrossJoin(this, rivers, joinType)); rivers.back()->activate(csb); } else @@ -988,16 +1022,48 @@ RecordSource* Optimizer::compile(BoolExprNodeStack* parentStack) // Attempt to form joins in decreasing order of desirability generateInnerJoin(joinStreams, rivers, &sort, rse->rse_plan); - // Re-activate remaining rivers to be hashable/mergeable - for (const auto river : rivers) - river->activate(csb); + if (rivers.isEmpty() && dependentRivers.isEmpty()) + { + // This case may look weird, but it's possible for recursive unions + rsb = FB_NEW_POOL(csb->csb_pool) NestedLoopJoin(csb, 0, nullptr, joinType); + } + else + { + while (rivers.hasData() || dependentRivers.hasData()) + { + // Re-activate remaining rivers to be hashable/mergeable + for (const auto river : rivers) + river->activate(csb); - // If there are multiple rivers, try some hashing or sort/merging - while (generateEquiJoin(rivers)) - ; + // If there are multiple rivers, try some hashing or sort/merging + while (generateEquiJoin(rivers, joinType)) + ; - rivers.join(dependentRivers); - rsb = CrossJoin(csb, rivers).getRecordSource(); + if (dependentRivers.hasData()) + { + fb_assert(joinType == INNER_JOIN); + + rivers.join(dependentRivers); + dependentRivers.clear(); + } + + const auto finalRiver = FB_NEW_POOL(getPool()) CrossJoin(this, rivers, joinType); + fb_assert(rivers.isEmpty()); + rsb = finalRiver->getRecordSource(); + + if (specialRivers.hasData()) + { + fb_assert(joinType == INNER_JOIN); + joinType = SEMI_JOIN; + + rivers.add(finalRiver); + rivers.join(specialRivers); + specialRivers.clear(); + } + } + } + + fb_assert(rsb); // Pick up any residual boolean that may have fallen thru the cracks rsb = applyResidualBoolean(rsb); @@ -2281,16 +2347,38 @@ void Optimizer::formRivers(const StreamList& streams, // If the whole things is a moby no-op, return false. // -bool Optimizer::generateEquiJoin(RiverList& orgRivers) +bool Optimizer::generateEquiJoin(RiverList& rivers, JoinType joinType) { + fb_assert(joinType != OUTER_JOIN); + ULONG selected_rivers[OPT_STREAM_BITS], selected_rivers2[OPT_STREAM_BITS]; ValueExprNode** eq_class; + RiverList orgRivers(rivers); + + // Find dependent rivers and exclude them from processing + + for (River** iter = orgRivers.begin(); iter < orgRivers.end();) + { + const auto river = *iter; + + StreamStateHolder stateHolder2(csb, river->getStreams()); + stateHolder2.activate(); + + if (river->isComputable(csb)) + { + iter++; + continue; + } + + orgRivers.remove(iter); + } + // Count the number of "rivers" involved in the operation, then allocate // a scratch block large enough to hold values to compute equality // classes. - const unsigned orgCount = (unsigned) orgRivers.getCount(); + const auto orgCount = (unsigned) orgRivers.getCount(); if (orgCount < 2) return false; @@ -2397,7 +2485,7 @@ bool Optimizer::generateEquiJoin(RiverList& orgRivers) // Prepare rivers for joining StreamList streams; - RiverList rivers; + RiverList joinedRivers; HalfStaticArray keys; unsigned position = 0, maxCardinalityPosition = 0, lowestPosition = MAX_ULONG; double maxCardinality1 = 0, maxCardinality2 = 0; @@ -2426,13 +2514,13 @@ bool Optimizer::generateEquiJoin(RiverList& orgRivers) { maxCardinality2 = maxCardinality1; maxCardinality1 = cardinality; - maxCardinalityPosition = rivers.getCount(); + maxCardinalityPosition = joinedRivers.getCount(); } else if (cardinality > maxCardinality2) maxCardinality2 = cardinality; streams.join(river->getStreams()); - rivers.add(river); + joinedRivers.add(river); orgRivers.remove(iter); // Collect keys to join on @@ -2455,10 +2543,11 @@ bool Optimizer::generateEquiJoin(RiverList& orgRivers) HalfStaticArray rsbs; RecordSource* finalRsb = nullptr; - if (useMergeJoin) + // MERGE JOIN does not support other join types yet + if (useMergeJoin && joinType == INNER_JOIN) { position = 0; - for (const auto river : rivers) + for (const auto river : joinedRivers) { const auto sort = FB_NEW_POOL(getPool()) SortNode(getPool()); @@ -2482,29 +2571,36 @@ bool Optimizer::generateEquiJoin(RiverList& orgRivers) } else { - // Ensure that the largest river is placed at the first position. - // It's important for a hash join to be efficient. + if (joinType == INNER_JOIN) + { + // Ensure that the largest river is placed at the first position. + // It's important for a hash join to be efficient. - const auto maxCardinalityRiver = rivers[maxCardinalityPosition]; - rivers[maxCardinalityPosition] = rivers[0]; - rivers[0] = maxCardinalityRiver; + const auto maxCardinalityRiver = joinedRivers[maxCardinalityPosition]; + joinedRivers[maxCardinalityPosition] = joinedRivers[0]; + joinedRivers[0] = maxCardinalityRiver; - const auto maxCardinalityKey = keys[maxCardinalityPosition]; - keys[maxCardinalityPosition] = keys[0]; - keys[0] = maxCardinalityKey; + const auto maxCardinalityKey = keys[maxCardinalityPosition]; + keys[maxCardinalityPosition] = keys[0]; + keys[0] = maxCardinalityKey; + } - for (const auto river : rivers) + for (const auto river : joinedRivers) rsbs.add(river->getRecordSource()); finalRsb = FB_NEW_POOL(getPool()) - HashJoin(tdbb, csb, rsbs.getCount(), rsbs.begin(), keys.begin()); + HashJoin(tdbb, csb, joinType, rsbs.getCount(), rsbs.begin(), keys.begin()); } // Pick up any boolean that may apply finalRsb = applyLocalBoolean(finalRsb, streams, iter); - const auto finalRiver = FB_NEW_POOL(getPool()) River(csb, finalRsb, rivers); - orgRivers.insert(lowestPosition, finalRiver); + const auto finalRiver = FB_NEW_POOL(getPool()) River(csb, finalRsb, joinedRivers); + + for (const auto river : joinedRivers) + rivers.findAndRemove(river); + + rivers.insert(lowestPosition, finalRiver); return true; } @@ -2736,6 +2832,20 @@ RecordSource* Optimizer::generateRetrieval(StreamType stream, } +// +// Compose a filter including all computable booleans +// + +RecordSource* Optimizer::applyBoolean(RecordSource* rsb, ConjunctIterator& iter) +{ + double selectivity = MAXIMUM_SELECTIVITY; + if (const auto boolean = composeBoolean(iter, &selectivity)) + rsb = FB_NEW_POOL(getPool()) FilteredStream(csb, rsb, boolean, selectivity); + + return rsb; +} + + // // Find conjuncts local to the given river and compose an appropriate filter // @@ -2750,11 +2860,7 @@ RecordSource* Optimizer::applyLocalBoolean(RecordSource* rsb, StreamStateHolder localHolder(csb, streams); localHolder.activate(csb); - double selectivity = MAXIMUM_SELECTIVITY; - if (const auto boolean = composeBoolean(iter, &selectivity)) - rsb = FB_NEW_POOL(getPool()) FilteredStream(csb, rsb, boolean, selectivity); - - return rsb; + return applyBoolean(rsb, iter); } diff --git a/src/jrd/optimizer/Optimizer.h b/src/jrd/optimizer/Optimizer.h index fc12bd4dd56..8712eb62ba4 100644 --- a/src/jrd/optimizer/Optimizer.h +++ b/src/jrd/optimizer/Optimizer.h @@ -499,6 +499,7 @@ class Optimizer : public Firebird::PermanentStorage return firstRows; } + RecordSource* applyBoolean(RecordSource* rsb, ConjunctIterator& iter); RecordSource* applyLocalBoolean(RecordSource* rsb, const StreamList& streams, ConjunctIterator& iter); @@ -513,6 +514,11 @@ class Optimizer : public Firebird::PermanentStorage return composeBoolean(iter, selectivity); } + bool isSemiJoined() const + { + return (rse->flags & RseNode::FLAG_SEMI_JOINED) != 0; + } + bool checkEquiJoin(BoolExprNode* boolean); bool getEquiJoinKeys(BoolExprNode* boolean, NestConst* node1, @@ -537,7 +543,7 @@ class Optimizer : public Firebird::PermanentStorage RiverList& rivers, SortNode** sortClause, const PlanNode* planClause); - bool generateEquiJoin(RiverList& org_rivers); + bool generateEquiJoin(RiverList& rivers, JoinType joinType = INNER_JOIN); void generateInnerJoin(StreamList& streams, RiverList& rivers, SortNode** sortClause, @@ -582,7 +588,7 @@ enum segmentScanType { segmentScanList }; -typedef Firebird::HalfStaticArray MatchedBooleanList; +typedef Firebird::HalfStaticArray BooleanList; struct IndexScratchSegment { @@ -610,7 +616,7 @@ struct IndexScratchSegment segmentScanType scanType = segmentScanNone; // scan type SSHORT scale = 0; // scale for SINT64/Int128-based segment of index - MatchedBooleanList matches; // matched booleans + BooleanList matches; // matched booleans }; struct IndexScratch @@ -631,7 +637,7 @@ struct IndexScratch bool useRootListScan = false; Firebird::ObjectsArray segments; - MatchedBooleanList matches; // matched booleans (partial indices only) + BooleanList matches; // matched booleans (partial indices only) }; typedef Firebird::ObjectsArray IndexScratchList; @@ -643,7 +649,7 @@ typedef Firebird::ObjectsArray IndexScratchList; struct InversionCandidate { explicit InversionCandidate(MemoryPool& p) - : matches(p), dbkeyRanges(p), dependentFromStreams(p) + : conjuncts(p), matches(p), dbkeyRanges(p), dependentFromStreams(p) {} double selectivity = MAXIMUM_SELECTIVITY; @@ -660,7 +666,8 @@ struct InversionCandidate bool unique = false; bool navigated = false; - MatchedBooleanList matches; + BooleanList conjuncts; // booleans referring our stream + BooleanList matches; // booleans matched to any index Firebird::Array dbkeyRanges; SortedStreamList dependentFromStreams; }; @@ -691,7 +698,7 @@ class Retrieval : private Firebird::PermanentStorage void analyzeNavigation(const InversionCandidateList& inversions); bool betterInversion(const InversionCandidate* inv1, const InversionCandidate* inv2, bool navigation) const; - bool checkIndexCondition(index_desc& idx, MatchedBooleanList& matches) const; + bool checkIndexCondition(index_desc& idx, BooleanList& matches) const; bool checkIndexExpression(const index_desc* idx, ValueExprNode* node) const; InversionNode* composeInversion(InversionNode* node1, InversionNode* node2, InversionNode::Type node_type) const; @@ -791,7 +798,7 @@ class InnerJoin : private Firebird::PermanentStorage { public: StreamInfo(MemoryPool& p, StreamType num) - : number(num), indexedRelationships(p) + : number(num), baseConjuncts(p), indexedRelationships(p) {} bool isIndependent() const @@ -838,6 +845,7 @@ class InnerJoin : private Firebird::PermanentStorage bool used = false; unsigned previousExpectedStreams = 0; + BooleanList baseConjuncts; IndexedRelationships indexedRelationships; }; @@ -922,7 +930,7 @@ class OuterJoin : private Firebird::PermanentStorage RecordSource* generate(); private: - RecordSource* process(const JoinType joinType); + RecordSource* process(); thread_db* const tdbb; Optimizer* const optimizer; diff --git a/src/jrd/optimizer/OuterJoin.cpp b/src/jrd/optimizer/OuterJoin.cpp index 343ac1605f1..d009fae3a51 100644 --- a/src/jrd/optimizer/OuterJoin.cpp +++ b/src/jrd/optimizer/OuterJoin.cpp @@ -94,7 +94,7 @@ OuterJoin::OuterJoin(thread_db* aTdbb, Optimizer* opt, RecordSource* OuterJoin::generate() { - const auto outerJoinRsb = process(OUTER_JOIN); + const auto outerJoinRsb = process(); if (!optimizer->isFullJoin()) return outerJoinRsb; @@ -107,6 +107,11 @@ RecordSource* OuterJoin::generate() auto& outerStream = joinStreams[0]; auto& innerStream = joinStreams[1]; + // Collect the outer streams to be used in the full outer join algorithm + + StreamList checkStreams; + outerStream.rsb->findUsedStreams(checkStreams); + std::swap(outerStream, innerStream); // Reset both streams to their original states @@ -131,15 +136,13 @@ RecordSource* OuterJoin::generate() iter.reset(CMP_clone_node_opt(tdbb, csb, iter)); } - const auto antiJoinRsb = process(ANTI_JOIN); - // Allocate and return the final join record source - return FB_NEW_POOL(getPool()) FullOuterJoin(csb, outerJoinRsb, antiJoinRsb); + return FB_NEW_POOL(getPool()) FullOuterJoin(csb, outerJoinRsb, process(), checkStreams); } -RecordSource* OuterJoin::process(const JoinType joinType) +RecordSource* OuterJoin::process() { BoolExprNode* boolean = nullptr; @@ -178,8 +181,7 @@ RecordSource* OuterJoin::process(const JoinType joinType) fb_assert(!innerStream.rsb); // AB: the sort clause for the inner stream of an OUTER JOIN // should never be used for the index retrieval - innerStream.rsb = optimizer->generateRetrieval(innerStream.number, nullptr, - false, (joinType == OUTER_JOIN) ? true : false); + innerStream.rsb = optimizer->generateRetrieval(innerStream.number, nullptr, false, true); } // Generate a parent filter record source for any remaining booleans that @@ -189,8 +191,7 @@ RecordSource* OuterJoin::process(const JoinType joinType) // Allocate and return the join record source - return FB_NEW_POOL(getPool()) - NestedLoopJoin(csb, outerStream.rsb, innerRsb, boolean, joinType); + return FB_NEW_POOL(getPool()) NestedLoopJoin(csb, outerStream.rsb, innerRsb, boolean); }; diff --git a/src/jrd/optimizer/Retrieval.cpp b/src/jrd/optimizer/Retrieval.cpp index 22e38893064..ab9b718d814 100644 --- a/src/jrd/optimizer/Retrieval.cpp +++ b/src/jrd/optimizer/Retrieval.cpp @@ -162,7 +162,7 @@ Retrieval::Retrieval(thread_db* aTdbb, Optimizer* opt, StreamType streamNumber, if (!tail->csb_idx) return; - MatchedBooleanList matches; + BooleanList matches; for (auto& index : *tail->csb_idx) { @@ -346,6 +346,12 @@ InversionCandidate* Retrieval::getInversion() { selectivity *= Optimizer::getSelectivity(*iter); } + + if (iter->computable(csb, INVALID_STREAM, false) && + iter->containsStream(stream)) + { + invCandidate->conjuncts.add(*iter); + } } } @@ -800,7 +806,7 @@ bool Retrieval::betterInversion(const InversionCandidate* inv1, return false; } -bool Retrieval::checkIndexCondition(index_desc& idx, MatchedBooleanList& matches) const +bool Retrieval::checkIndexCondition(index_desc& idx, BooleanList& matches) const { fb_assert(idx.idx_condition); @@ -911,7 +917,7 @@ void Retrieval::getInversionCandidates(InversionCandidateList& inversions, const double minSelectivity = MIN(MAXIMUM_SELECTIVITY / cardinality, DEFAULT_SELECTIVITY); // Walk through indexes to calculate selectivity / candidate - MatchedBooleanList matches; + BooleanList matches; for (auto& scratch : fromIndexScratches) { @@ -1428,7 +1434,7 @@ InversionCandidate* Retrieval::makeInversion(InversionCandidateList& inversions) } } - MatchedBooleanList matches; + BooleanList matches; if (navigationCandidate) { diff --git a/src/jrd/recsrc/FullOuterJoin.cpp b/src/jrd/recsrc/FullOuterJoin.cpp index 1578b1100b3..07e5769323e 100644 --- a/src/jrd/recsrc/FullOuterJoin.cpp +++ b/src/jrd/recsrc/FullOuterJoin.cpp @@ -37,10 +37,13 @@ using namespace Jrd; // Data access: full outer join // ---------------------------- -FullOuterJoin::FullOuterJoin(CompilerScratch* csb, RecordSource* arg1, RecordSource* arg2) +FullOuterJoin::FullOuterJoin(CompilerScratch* csb, + RecordSource* arg1, RecordSource* arg2, + const StreamList& checkStreams) : RecordSource(csb), m_arg1(arg1), - m_arg2(arg2) + m_arg2(arg2), + m_checkStreams(csb->csb_pool, checkStreams) { fb_assert(m_arg1 && m_arg2); @@ -97,7 +100,27 @@ bool FullOuterJoin::internalGetRecord(thread_db* tdbb) const m_arg2->open(tdbb); } - return m_arg2->getRecord(tdbb); + // We should exclude matching records from the right-joined (second) record source, + // as they're already returned from the left-joined (first) record source + + while (m_arg2->getRecord(tdbb)) + { + bool matched = false; + + for (const auto i : m_checkStreams) + { + if (request->req_rpb[i].rpb_number.isValid()) + { + matched = true; + break; + } + } + + if (!matched) + return true; + } + + return false; } bool FullOuterJoin::refetchRecord(thread_db* /*tdbb*/) const diff --git a/src/jrd/recsrc/HashJoin.cpp b/src/jrd/recsrc/HashJoin.cpp index ebca83781b5..5535b47ac1f 100644 --- a/src/jrd/recsrc/HashJoin.cpp +++ b/src/jrd/recsrc/HashJoin.cpp @@ -37,13 +37,15 @@ using namespace Firebird; using namespace Jrd; +//#define PRINT_HASH_TABLE + // ---------------------- // Data access: hash join // ---------------------- // NS: FIXME - Why use static hash table here??? Hash table shall support dynamic resizing static const ULONG HASH_SIZE = 1009; -static const ULONG BUCKET_PREALLOCATE_SIZE = 32; // 256 bytes per slot +static const ULONG BUCKET_PREALLOCATE_SIZE = 32; // 256 bytes per bucket unsigned HashJoin::maxCapacity() { @@ -92,6 +94,11 @@ class HashJoin::HashTable : public PermanentStorage m_collisions.sort(); } + ULONG getCount() const + { + return (ULONG) m_collisions.getCount(); + } + void add(ULONG hash, ULONG position) { m_collisions.add(Entry(hash, position)); @@ -200,13 +207,38 @@ class HashJoin::HashTable : public PermanentStorage void sort() { + for (ULONG i = 0; i < m_streamCount * m_tableSize; i++) + { + if (const auto collisions = m_collisions[i]) + collisions->sort(); + } + +#ifdef PRINT_HASH_TABLE + FB_UINT64 total = 0; + ULONG min = MAX_ULONG, max = 0, count = 0; + for (ULONG i = 0; i < m_streamCount * m_tableSize; i++) { CollisionList* const collisions = m_collisions[i]; + if (!collisions) + continue; - if (collisions) - collisions->sort(); + const auto cnt = collisions->getCount(); + + if (cnt < min) + min = cnt; + if (cnt > max) + max = cnt; + total += cnt; + count++; } + + if (count) + { + printf("Hash table size %u, count %u, buckets %u, min %u, max %u, avg %u\n", + m_tableSize, (ULONG) total, count, min, max, (ULONG) (total / count)); + } +#endif } private: @@ -217,14 +249,35 @@ class HashJoin::HashTable : public PermanentStorage }; -HashJoin::HashJoin(thread_db* tdbb, CompilerScratch* csb, FB_SIZE_T count, - RecordSource* const* args, NestValueArray* const* keys, +HashJoin::HashJoin(thread_db* tdbb, CompilerScratch* csb, JoinType joinType, + FB_SIZE_T count, RecordSource* const* args, NestValueArray* const* keys, double selectivity) : RecordSource(csb), + m_joinType(joinType), + m_boolean(nullptr), m_args(csb->csb_pool, count - 1) { fb_assert(count >= 2); + init(tdbb, csb, count, args, keys, selectivity); +} + +HashJoin::HashJoin(thread_db* tdbb, CompilerScratch* csb, + BoolExprNode* boolean, + RecordSource* const* args, NestValueArray* const* keys, + double selectivity) + : RecordSource(csb), + m_joinType(OUTER_JOIN), + m_boolean(boolean), + m_args(csb->csb_pool, 1) +{ + init(tdbb, csb, 2, args, keys, selectivity); +} + +void HashJoin::init(thread_db* tdbb, CompilerScratch* csb, FB_SIZE_T count, + RecordSource* const* args, NestValueArray* const* keys, + double selectivity) +{ m_impure = csb->allocImpure(); m_leader.source = args[0]; @@ -360,6 +413,8 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const if (!(impure->irsb_flags & irsb_open)) return false; + const auto inner = m_args.front().source; + while (true) { if (impure->irsb_flags & irsb_mustread) @@ -369,6 +424,14 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const if (!m_leader.source->getRecord(tdbb)) return false; + if (m_boolean && !m_boolean->execute(tdbb, request)) + { + // The boolean pertaining to the left sub-stream is false + // so just join sub-stream to a null valued right sub-stream + inner->nullRecords(tdbb); + return true; + } + // We have something to join with, so ensure the hash table is initialized if (!impure->irsb_hash_table && !impure->irsb_leader_buffer) @@ -410,7 +473,15 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const // Setup the hash table for the iteration through collisions. if (!impure->irsb_hash_table->setup(impure->irsb_leader_hash)) - continue; + { + if (m_joinType == INNER_JOIN || m_joinType == SEMI_JOIN) + continue; + + if (m_joinType == OUTER_JOIN) + inner->nullRecords(tdbb); + + return true; + } impure->irsb_flags &= ~irsb_mustread; impure->irsb_flags |= irsb_first; @@ -434,13 +505,29 @@ bool HashJoin::internalGetRecord(thread_db* tdbb) const if (!found) { impure->irsb_flags |= irsb_mustread; - continue; + + if (m_joinType == INNER_JOIN || m_joinType == SEMI_JOIN) + continue; + + if (m_joinType == OUTER_JOIN) + inner->nullRecords(tdbb); + + break; + } + + if (m_joinType == SEMI_JOIN || m_joinType == ANTI_JOIN) + { + impure->irsb_flags |= irsb_mustread; + + if (m_joinType == ANTI_JOIN) + continue; } impure->irsb_flags &= ~irsb_first; } else if (!fetchRecord(tdbb, impure, m_args.getCount() - 1)) { + fb_assert(m_joinType == INNER_JOIN); impure->irsb_flags |= irsb_mustread; continue; } @@ -481,11 +568,36 @@ void HashJoin::internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsigned l { planEntry.className = "HashJoin"; + planEntry.lines.add().text = "Hash Join "; + + switch (m_joinType) + { + case INNER_JOIN: + planEntry.lines.back().text += "(inner)"; + break; + + case OUTER_JOIN: + planEntry.lines.back().text += "(outer)"; + break; + + case SEMI_JOIN: + planEntry.lines.back().text += "(semi)"; + break; + + case ANTI_JOIN: + planEntry.lines.back().text += "(anti)"; + break; + + default: + fb_assert(false); + } + string extras; extras.printf(" (keys: %" ULONGFORMAT", total key length: %" ULONGFORMAT")", m_leader.keys->getCount(), m_leader.totalKeyLength); - planEntry.lines.add().text = "Hash Join (inner)" + extras; + planEntry.lines.add().text += extras; + printOptInfo(planEntry.lines); if (recurse) @@ -625,6 +737,9 @@ bool HashJoin::fetchRecord(thread_db* tdbb, Impure* impure, FB_SIZE_T stream) co return true; } + if (m_joinType == SEMI_JOIN || m_joinType == ANTI_JOIN) + return false; + while (true) { if (stream == 0 || !fetchRecord(tdbb, impure, stream - 1)) diff --git a/src/jrd/recsrc/NestedLoopJoin.cpp b/src/jrd/recsrc/NestedLoopJoin.cpp index 2f891128070..12cc14c3955 100644 --- a/src/jrd/recsrc/NestedLoopJoin.cpp +++ b/src/jrd/recsrc/NestedLoopJoin.cpp @@ -35,30 +35,34 @@ using namespace Jrd; // Data access: nested loops join // ------------------------------ -NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, FB_SIZE_T count, RecordSource* const* args) +NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, + FB_SIZE_T count, + RecordSource* const* args, + JoinType joinType) : RecordSource(csb), - m_joinType(INNER_JOIN), - m_args(csb->csb_pool), - m_boolean(NULL) + m_joinType(joinType), + m_boolean(nullptr), + m_args(csb->csb_pool, count) { m_impure = csb->allocImpure(); m_cardinality = MINIMUM_CARDINALITY; - m_args.resize(count); - for (FB_SIZE_T i = 0; i < count; i++) { - m_args[i] = args[i]; + m_args.add(args[i]); m_cardinality *= args[i]->getCardinality(); } } -NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, RecordSource* outer, RecordSource* inner, - BoolExprNode* boolean, JoinType joinType) +NestedLoopJoin::NestedLoopJoin(CompilerScratch* csb, + RecordSource* outer, + RecordSource* inner, + BoolExprNode* boolean, + JoinType joinType) : RecordSource(csb), m_joinType(joinType), - m_args(csb->csb_pool), - m_boolean(boolean) + m_boolean(boolean), + m_args(csb->csb_pool, 2) { fb_assert(outer && inner); @@ -90,8 +94,8 @@ void NestedLoopJoin::close(thread_db* tdbb) const { impure->irsb_flags &= ~irsb_open; - for (FB_SIZE_T i = 0; i < m_args.getCount(); i++) - m_args[i]->close(tdbb); + for (const auto arg : m_args) + arg->close(tdbb); } } @@ -127,12 +131,70 @@ bool NestedLoopJoin::internalGetRecord(thread_db* tdbb) const else if (!fetchRecord(tdbb, m_args.getCount() - 1)) return false; } + else if (m_joinType == SEMI_JOIN || m_joinType == ANTI_JOIN) + { + const auto outer = m_args[0]; + + if (impure->irsb_flags & irsb_first) + { + outer->open(tdbb); + + impure->irsb_flags &= ~irsb_first; + } + + while (true) + { + if (impure->irsb_flags & irsb_joined) + { + for (FB_SIZE_T i = 1; i < m_args.getCount(); i++) + m_args[i]->close(tdbb); + + impure->irsb_flags &= ~irsb_joined; + } + + if (!outer->getRecord(tdbb)) + return false; + + FB_SIZE_T stopArg = 0; + + for (FB_SIZE_T i = 1; i < m_args.getCount(); i++) + { + m_args[i]->open(tdbb); + + if (m_args[i]->getRecord(tdbb)) + { + if (m_joinType == ANTI_JOIN) + { + stopArg = i; + break; + } + } + else + { + if (m_joinType == SEMI_JOIN) + { + stopArg = i; + break; + } + } + } + + if (!stopArg) + break; + + for (FB_SIZE_T i = 1; i <= stopArg; i++) + m_args[i]->close(tdbb); + } + + impure->irsb_flags |= irsb_joined; + } else { + fb_assert(m_joinType == OUTER_JOIN); fb_assert(m_args.getCount() == 2); - const RecordSource* const outer = m_args[0]; - const RecordSource* const inner = m_args[1]; + const auto outer = m_args[0]; + const auto inner = m_args[1]; if (impure->irsb_flags & irsb_first) { @@ -159,27 +221,10 @@ bool NestedLoopJoin::internalGetRecord(thread_db* tdbb) const inner->open(tdbb); } - if (m_joinType == SEMI_JOIN) - { - if (inner->getRecord(tdbb)) - impure->irsb_flags &= ~irsb_joined; - else - impure->irsb_flags |= irsb_joined; - } - else if (m_joinType == ANTI_JOIN) + if (inner->getRecord(tdbb)) { - if (inner->getRecord(tdbb)) - impure->irsb_flags |= irsb_joined; - else - impure->irsb_flags &= ~irsb_joined; - } - else - { - if (inner->getRecord(tdbb)) - { - impure->irsb_flags |= irsb_joined; - return true; - } + impure->irsb_flags |= irsb_joined; + return true; } inner->close(tdbb); @@ -266,26 +311,26 @@ void NestedLoopJoin::internalGetPlan(thread_db* tdbb, PlanEntry& planEntry, unsi void NestedLoopJoin::markRecursive() { - for (FB_SIZE_T i = 0; i < m_args.getCount(); i++) - m_args[i]->markRecursive(); + for (auto arg : m_args) + arg->markRecursive(); } void NestedLoopJoin::findUsedStreams(StreamList& streams, bool expandAll) const { - for (FB_SIZE_T i = 0; i < m_args.getCount(); i++) - m_args[i]->findUsedStreams(streams, expandAll); + for (const auto arg : m_args) + arg->findUsedStreams(streams, expandAll); } void NestedLoopJoin::invalidateRecords(Request* request) const { - for (FB_SIZE_T i = 0; i < m_args.getCount(); i++) - m_args[i]->invalidateRecords(request); + for (const auto arg : m_args) + arg->invalidateRecords(request); } void NestedLoopJoin::nullRecords(thread_db* tdbb) const { - for (FB_SIZE_T i = 0; i < m_args.getCount(); i++) - m_args[i]->nullRecords(tdbb); + for (const auto arg : m_args) + arg->nullRecords(tdbb); } bool NestedLoopJoin::fetchRecord(thread_db* tdbb, FB_SIZE_T n) const diff --git a/src/jrd/recsrc/RecordSource.h b/src/jrd/recsrc/RecordSource.h index ee65daf6f17..a7f1702755c 100644 --- a/src/jrd/recsrc/RecordSource.h +++ b/src/jrd/recsrc/RecordSource.h @@ -1135,9 +1135,10 @@ namespace Jrd class NestedLoopJoin : public RecordSource { public: - NestedLoopJoin(CompilerScratch* csb, FB_SIZE_T count, RecordSource* const* args); + NestedLoopJoin(CompilerScratch* csb, FB_SIZE_T count, RecordSource* const* args, + JoinType joinType = INNER_JOIN); NestedLoopJoin(CompilerScratch* csb, RecordSource* outer, RecordSource* inner, - BoolExprNode* boolean, JoinType joinType); + BoolExprNode* boolean, JoinType joinType = OUTER_JOIN); void close(thread_db* tdbb) const override; @@ -1161,14 +1162,16 @@ namespace Jrd bool fetchRecord(thread_db*, FB_SIZE_T) const; const JoinType m_joinType; + const NestConst m_boolean; + Firebird::Array > m_args; - NestConst const m_boolean; }; class FullOuterJoin : public RecordSource { public: - FullOuterJoin(CompilerScratch* csb, RecordSource* arg1, RecordSource* arg2); + FullOuterJoin(CompilerScratch* csb, RecordSource* arg1, RecordSource* arg2, + const StreamList& checkStreams); void close(thread_db* tdbb) const override; @@ -1191,6 +1194,7 @@ namespace Jrd private: NestConst m_arg1; NestConst m_arg2; + const StreamList m_checkStreams; }; class HashJoin : public RecordSource @@ -1218,7 +1222,11 @@ namespace Jrd }; public: - HashJoin(thread_db* tdbb, CompilerScratch* csb, FB_SIZE_T count, + HashJoin(thread_db* tdbb, CompilerScratch* csb, JoinType joinType, + FB_SIZE_T count, RecordSource* const* args, NestValueArray* const* keys, + double selectivity = 0); + HashJoin(thread_db* tdbb, CompilerScratch* csb, + BoolExprNode* boolean, RecordSource* const* args, NestValueArray* const* keys, double selectivity = 0); @@ -1243,10 +1251,16 @@ namespace Jrd bool internalGetRecord(thread_db* tdbb) const override; private: + void init(thread_db* tdbb, CompilerScratch* csb, FB_SIZE_T count, + RecordSource* const* args, NestValueArray* const* keys, + double selectivity); ULONG computeHash(thread_db* tdbb, Request* request, const SubStream& sub, UCHAR* buffer) const; bool fetchRecord(thread_db* tdbb, Impure* impure, FB_SIZE_T stream) const; + const JoinType m_joinType; + const NestConst m_boolean; + SubStream m_leader; Firebird::Array m_args; };