From 514b7bbae0685ebc6cb18226729e8220b6dd3ce9 Mon Sep 17 00:00:00 2001 From: HanaPearlman Date: Tue, 16 Dec 2025 11:35:18 -0500 Subject: [PATCH] SERVER-115146: Estimate cardinality of join subsets (#45201) GitOrigin-RevId: a9d039904dea9728bc76b19f6ffabd9aba0667d9 --- .../optimizer/join/cardinality_estimator.cpp | 68 ++++++- .../optimizer/join/cardinality_estimator.h | 25 ++- .../join/cardinality_estimator_join_test.cpp | 179 +++++++++++++++++- .../compiler/optimizer/join/executor.cpp | 10 +- .../optimizer/join/plan_enumerator.cpp | 3 +- .../compiler/optimizer/join/plan_enumerator.h | 6 +- .../optimizer/join/plan_enumerator_test.cpp | 17 +- .../compiler/optimizer/join/reorder_joins.cpp | 3 +- .../compiler/optimizer/join/reorder_joins.h | 2 + 9 files changed, 283 insertions(+), 30 deletions(-) diff --git a/src/mongo/db/query/compiler/optimizer/join/cardinality_estimator.cpp b/src/mongo/db/query/compiler/optimizer/join/cardinality_estimator.cpp index b6c302c2779..bfb0b5e5897 100644 --- a/src/mongo/db/query/compiler/optimizer/join/cardinality_estimator.cpp +++ b/src/mongo/db/query/compiler/optimizer/join/cardinality_estimator.cpp @@ -30,6 +30,7 @@ #include "mongo/db/query/compiler/optimizer/join/cardinality_estimator.h" +#include "mongo/db/query/util/bitset_util.h" #include "mongo/util/assert_util.h" #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQueryCE @@ -43,11 +44,11 @@ JoinCardinalityEstimator::JoinCardinalityEstimator(EdgeSelectivities edgeSelecti JoinCardinalityEstimator JoinCardinalityEstimator::make( const JoinReorderingContext& ctx, - const SingleTableAccessPlansResult& singleTablePlansRes, + const cost_based_ranker::EstimateMap& estimates, const SamplingEstimatorMap& samplingEstimators) { return JoinCardinalityEstimator( JoinCardinalityEstimator::estimateEdgeSelectivities(ctx, samplingEstimators), - JoinCardinalityEstimator::extractNodeCardinalities(ctx, singleTablePlansRes)); + JoinCardinalityEstimator::extractNodeCardinalities(ctx, estimates)); } EdgeSelectivities JoinCardinalityEstimator::estimateEdgeSelectivities( @@ -63,13 +64,16 @@ EdgeSelectivities JoinCardinalityEstimator::estimateEdgeSelectivities( } NodeCardinalities JoinCardinalityEstimator::extractNodeCardinalities( - const JoinReorderingContext& ctx, const SingleTableAccessPlansResult& singleTablePlansRes) { + const JoinReorderingContext& ctx, const cost_based_ranker::EstimateMap& estimates) { NodeCardinalities nodeCardinalities; nodeCardinalities.reserve(ctx.joinGraph.numNodes()); for (size_t nodeId = 0; nodeId < ctx.joinGraph.numNodes(); nodeId++) { auto* cq = ctx.joinGraph.accessPathAt(nodeId); - auto cbrRes = singleTablePlansRes.estimate.at(singleTablePlansRes.solns.at(cq)->root()); - nodeCardinalities.push_back(cbrRes.outCE); + auto qsn = ctx.cbrCqQsns.find(cq); + tassert(11514600, "Missing QSN for CanonicalQuery", qsn != ctx.cbrCqQsns.end()); + auto cbrRes = estimates.find(qsn->second->root()); + tassert(11514601, "Missing estimate for QSN root", cbrRes != estimates.end()); + nodeCardinalities.push_back(cbrRes->second.outCE); } return nodeCardinalities; } @@ -166,4 +170,58 @@ cost_based_ranker::SelectivityEstimate JoinCardinalityEstimator::joinPredicateSe "selectivityEstimate"_attr = res); return res; } + +cost_based_ranker::CardinalityEstimate JoinCardinalityEstimator::getOrEstimateSubsetCardinality( + const JoinReorderingContext& ctx, const NodeSet& nodes) { + if (auto it = _subsetCardinalities.find(nodes); it != _subsetCardinalities.end()) { + return it->second; + } + + // This method assumes that all predicates (join and and single-table) are independent from each + // other, allowing us to combine them with simple multiplication below. + // + // '_edgeSels' contains edge selectivities: for a given edge connecting tables U and V, it is + // the fraction of rows that are output by the U-V join over the total number of row + // combinations between U and V (|U| * |V|). The number of rows in the U-V output is by + // definition this selectivity multiplied by |U| and |V|. + // + // We extend this logic to more tables using the independence assumption. For example, given the + // result of the U-V join, assume we are further joining with W through a V-W edge. We treat the + // intermediate result as a single "table" with its own cardinality. We apply the selectivity of + // the V-W edge to estimate how many rows from the intermediate result match rows in W, and + // finally multiply by |W| to account for the number of rows in W that participate in the join. + // + // So far, we have the product of all base table cardinalities with the selectivities of the + // edges in the graph induced by 'nodes'. We must also include the selectivities of single-table + // predicates. By the independence assumption, these can simply be multiplied with the product. + // + // One final complication involves cycles. If all selectivities from edges in a cycle are + // included in the estimate, we will double-count some join predicates. We remove cycles below + // by building a spanning tree (or forest) from the edges considered. + // + // Therefore, this method takes the following steps: Induce a subgraph involving only the nodes + // in 'nodes', and reduce the edges in that subgraph to remove cycles. Then, multiply: + // (1) The selectivities from the reduced edge list. + // (2) The base table cardinalities. + // (3) The single-table predicate selectivities. + // Finally, note that we have the pre-computed combination of (2) and (3) in '_nodeCEs'. + cost_based_ranker::CardinalityEstimate ce = cost_based_ranker::oneCE; + for (auto nodeIdx : iterable(nodes, ctx.joinGraph.numNodes())) { + ce = ce * _nodeCardinalities[nodeIdx].toDouble(); + } + // TODO SERVER-115559: Invoke cycle breaker over these edges. + std::vector edges = ctx.joinGraph.getEdgesForSubgraph(nodes); + for (const auto& edgeId : edges) { + ce = ce * _edgeSelectivities.at(edgeId); + } + + LOGV2_DEBUG(11514603, + 5, + "Estimating cardinality for subset", + "subset"_attr = nodes.to_string(), + "cardinalityEstimate"_attr = ce); + + _subsetCardinalities.emplace(nodes, ce); + return ce; +} } // namespace mongo::join_ordering diff --git a/src/mongo/db/query/compiler/optimizer/join/cardinality_estimator.h b/src/mongo/db/query/compiler/optimizer/join/cardinality_estimator.h index 0a4ce683f05..91382d466e6 100644 --- a/src/mongo/db/query/compiler/optimizer/join/cardinality_estimator.h +++ b/src/mongo/db/query/compiler/optimizer/join/cardinality_estimator.h @@ -46,6 +46,11 @@ using NodeCardinalities = std::vector; */ using EdgeSelectivities = std::vector; +/** + * Tracks for each JoinSubset (represented by a NodeSet) the estimated cardinality of the join. + */ +using SubsetCardinalities = absl::flat_hash_map; + /** * Contains logic necessary to do selectivity and cardinality estimation for joins. */ @@ -55,7 +60,7 @@ public: NodeCardinalities nodeCardinalities); static JoinCardinalityEstimator make(const JoinReorderingContext& ctx, - const SingleTableAccessPlansResult& singleTablePlansRes, + const cost_based_ranker::EstimateMap& estimates, const SamplingEstimatorMap& samplingEstimators); /** @@ -70,10 +75,22 @@ public: const JoinReorderingContext& ctx, const SamplingEstimatorMap& samplingEstimators); static NodeCardinalities extractNodeCardinalities( - const JoinReorderingContext& ctx, const SingleTableAccessPlansResult& singleTablePlansRes); + const JoinReorderingContext& ctx, const cost_based_ranker::EstimateMap& estimates); + + /** + * Estimates the cardinality of a join plan over the given subset of nodes. This method + * constructs a spanning tree from the edges in the graph induced by 'nodes', and combines the + * edge selectivities, base table cardinalities, and single-table predicate selectivities to + * produce an estimate. Populates `_subsetCardinalities` with the result. + */ + cost_based_ranker::CardinalityEstimate getOrEstimateSubsetCardinality( + const JoinReorderingContext& ctx, const NodeSet& nodes); private: - EdgeSelectivities _edgeSelectivities; - NodeCardinalities _nodeCardinalities; + const EdgeSelectivities _edgeSelectivities; + const NodeCardinalities _nodeCardinalities; + + // Populated over the course of subset enumeration. + SubsetCardinalities _subsetCardinalities; }; } // namespace mongo::join_ordering diff --git a/src/mongo/db/query/compiler/optimizer/join/cardinality_estimator_join_test.cpp b/src/mongo/db/query/compiler/optimizer/join/cardinality_estimator_join_test.cpp index dbdb3ca0881..e519f378d69 100644 --- a/src/mongo/db/query/compiler/optimizer/join/cardinality_estimator_join_test.cpp +++ b/src/mongo/db/query/compiler/optimizer/join/cardinality_estimator_join_test.cpp @@ -181,22 +181,187 @@ TEST_F(JoinPredicateEstimatorFixture, ExtractNodeCardinalities) { const auto aCE = CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling}; const auto bCE = CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling}; - SingleTableAccessPlansResult singleTablePlansRes; + cost_based_ranker::EstimateMap estimates; { auto aPlan = makeCollScanPlan(aNss); - singleTablePlansRes.estimate[aPlan->root()] = {inCE, aCE}; - singleTablePlansRes.solns[graph.getNode(aNodeId).accessPath.get()] = std::move(aPlan); + estimates[aPlan->root()] = {inCE, aCE}; + ctx.cbrCqQsns[graph.getNode(aNodeId).accessPath.get()] = std::move(aPlan); } { auto bPlan = makeCollScanPlan(bNss); - singleTablePlansRes.estimate[bPlan->root()] = {inCE, bCE}; - singleTablePlansRes.solns[graph.getNode(bNodeId).accessPath.get()] = std::move(bPlan); + estimates[bPlan->root()] = {inCE, bCE}; + ctx.cbrCqQsns[graph.getNode(bNodeId).accessPath.get()] = std::move(bPlan); } - auto nodeCardinalities = - JoinCardinalityEstimator::extractNodeCardinalities(ctx, singleTablePlansRes); + auto nodeCardinalities = JoinCardinalityEstimator::extractNodeCardinalities(ctx, estimates); ASSERT_EQ(2U, nodeCardinalities.size()); ASSERT_EQ(aCE, nodeCardinalities[aNodeId]); ASSERT_EQ(bCE, nodeCardinalities[bNodeId]); } + +namespace { +void pushNNodes(JoinGraph& graph, size_t n) { + for (size_t i = 0; i < n; i++) { + auto nss = + NamespaceString::createNamespaceString_forTest("test", str::stream() << "nss" << i); + graph.addNode(nss, nullptr, boost::none); + } +} +} // namespace + +TEST_F(JoinPredicateEstimatorFixture, EstimateSubsetCardinality) { + // Construct 6 nodes, with single-table CEs that are multiples of 10. Node 0 will be ignored in + // the rest of the test; it is only there for easy math. + size_t numNodes = 6; + pushNNodes(graph, numNodes); + NodeCardinalities nodeCEs{oneCE, oneCE * 10, oneCE * 20, oneCE * 30, oneCE * 40, oneCE * 50}; + + /** + * Construct a graph like so + * 0 -- 1 -- 2 -- 3 + * / \ + * 4 -- 5 + * With edge selectivies that are multiples of 0.1. + * Note: There is one cycle here between nodes 3, 4, and 5. There are no other cycles (implicit + * or explicit). + */ + graph.addSimpleEqualityEdge(NodeId(0), NodeId(1), 0, 1); + graph.addSimpleEqualityEdge(NodeId(1), NodeId(2), 2, 3); + graph.addSimpleEqualityEdge(NodeId(2), NodeId(3), 4, 5); + graph.addSimpleEqualityEdge(NodeId(3), NodeId(4), 6, 7); + graph.addSimpleEqualityEdge(NodeId(4), NodeId(5), 7, 8); + graph.addSimpleEqualityEdge(NodeId(3), NodeId(5), 6, 8); + + EdgeSelectivities edgeSels; + for (size_t i = 0; i < numNodes; i++) { + edgeSels.push_back(cost_based_ranker::SelectivityEstimate(SelectivityType(i * 0.1), + EstimationSource::Sampling)); + } + + JoinCardinalityEstimator jce(edgeSels, nodeCEs); + { + // Cardinality for subset of size 1 is pulled directly from the CE map. + ASSERT_EQ(oneCE * 10, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1))); + ASSERT_EQ(oneCE * 20, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(2))); + ASSERT_EQ(oneCE * 30, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(3))); + ASSERT_EQ(oneCE * 40, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(4))); + ASSERT_EQ(oneCE * 50, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(5))); + } + { + // Connected sub-graph cardinality is a combo of single-table CEs and edge selectivities. + ASSERT_EQ(oneCE * 10 * 20 * 0.1, + jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1, 2))); + ASSERT_EQ(oneCE * 30 * 40 * 0.3, + jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(3, 4))); + + ASSERT_EQ(oneCE * 10 * 20 * 30 * 0.1 * 0.2, + jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1, 2, 3))); + ASSERT_EQ(oneCE * 20 * 30 * 40 * 0.2 * 0.3, + jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(2, 3, 4))); + + ASSERT_EQ(oneCE * 10 * 20 * 30 * 50 * 0.1 * 0.2 * 0.5, + jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1, 2, 3, 5))); + } + + { + // Disconnected sub-graph cardinality includes some cross-products. + ASSERT_EQ(oneCE * 20 * 40, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(2, 4))); + ASSERT_EQ(oneCE * 10 * 30 * 40 * 0.3, + jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1, 3, 4))); + } + + { + // TODO SERVER-115559: Adjust the assertions made here when we implement cycle breaking. + // Cycle cardinality estimation should not involve all edges in the cycle. + ASSERT_EQ(oneCE * 30 * 40 * 50 * 0.3 * 0.4 * 0.5, + jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(3, 4, 5))); + + ASSERT_EQ(oneCE * 10 * 20 * 30 * 40 * 50 * 0.1 * 0.2 * 0.3 * 0.4 * 0.5, + jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1, 2, 3, 4, 5))); + } +} + +// Similar to the test above, but verifies that path IDs are considered when determining the +// presence of cycles. +TEST_F(JoinPredicateEstimatorFixture, EstimateSubsetCardinalityAlmostCycle) { + size_t numNodes = 4; + pushNNodes(graph, numNodes); + NodeCardinalities nodeCEs{oneCE, oneCE * 10, oneCE * 20, oneCE * 30}; + + /** + * Construct a graph like so + * 0 -- 1 + * / \ + * 2 -- 3 + * Note: There is NO cycle here, because the path IDs chosen for the edges are different. + */ + graph.addSimpleEqualityEdge(NodeId(0), NodeId(1), 0, 1); + graph.addSimpleEqualityEdge(NodeId(1), NodeId(2), 2, 3); + graph.addSimpleEqualityEdge(NodeId(2), NodeId(3), 4, 5); + graph.addSimpleEqualityEdge(NodeId(1), NodeId(3), 6, 7); + + EdgeSelectivities edgeSels; + for (size_t i = 0; i < numNodes; i++) { + edgeSels.push_back(cost_based_ranker::SelectivityEstimate(SelectivityType(i * 0.1), + EstimationSource::Sampling)); + } + + JoinCardinalityEstimator jce(edgeSels, nodeCEs); + ASSERT_EQ(oneCE * 10 * 20 * 30 * 0.1 * 0.2 * 0.3, + jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1, 2, 3))); +} + +TEST_F(JoinPredicateEstimatorFixture, EstimateSubsetCardinalitySameCollectionPresentTwice) { + auto nssOne = NamespaceString::createNamespaceString_forTest("test", str::stream() << "nssOne"); + auto nssTwo = NamespaceString::createNamespaceString_forTest("test", str::stream() << "nssTwo"); + + std::string fieldNameA = str::stream() << "a" << 0; + auto filterBSONA = BSON(fieldNameA << BSON("$gt" << 0)); + + std::string fieldNameB = str::stream() << "b" << 0; + auto filterBSONB = BSON(fieldNameB << BSON("$gt" << 0)); + + // The first reference to nssOne has a filter on field "a". + auto cqA = makeCanonicalQuery(nssOne, filterBSONA); + ASSERT_TRUE(graph.addNode(nssOne, std::move(cqA), boost::none).has_value()); + + // The second reference to nssOne has a filter on field "b". This node will have larger CE. + auto cqB = makeCanonicalQuery(nssOne, filterBSONB); + ASSERT_TRUE(graph.addNode(nssOne, std::move(cqB), boost::none).has_value()); + + // Finally, there is a node in between for nssTwo. + auto cqNssTwo = makeCanonicalQuery(nssTwo, filterBSONA); + ASSERT_TRUE(graph.addNode(nssTwo, std::move(cqNssTwo), boost::none).has_value()); + + // Finalize graph: + // 0 1 + // \ / + // 2 + graph.addSimpleEqualityEdge(NodeId(0), NodeId(2), 0, 1); + graph.addSimpleEqualityEdge(NodeId(1), NodeId(2), 2, 3); + EdgeSelectivities edgeSels; + for (size_t i = 0; i < 2; i++) { + edgeSels.push_back(cost_based_ranker::SelectivityEstimate(SelectivityType((i + 1) * 0.1), + EstimationSource::Sampling)); + } + NodeCardinalities nodeCEs{ + oneCE * 10, + oneCE * 20, + oneCE * 30, + }; + JoinCardinalityEstimator jce(edgeSels, nodeCEs); + + // Show that even though the namespace is the same for two of the nodes, we are able to + // correctly associate CE with the particular filters associated with those nodes. + ASSERT_EQ(oneCE * 10, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(0))); + ASSERT_EQ(oneCE * 20, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1))); + ASSERT_EQ(oneCE * 30, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(2))); + + ASSERT_EQ(oneCE * 10 * 20, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(0, 1))); + ASSERT_EQ(oneCE * 10 * 30 * 0.1, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(0, 2))); + ASSERT_EQ(oneCE * 20 * 30 * 0.2, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1, 2))); + + ASSERT_EQ(oneCE * 10 * 20 * 30 * 0.1 * 0.2, + jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(0, 1, 2))); +} } // namespace mongo::join_ordering diff --git a/src/mongo/db/query/compiler/optimizer/join/executor.cpp b/src/mongo/db/query/compiler/optimizer/join/executor.cpp index 9d095dc9d84..2dbb4345a4a 100644 --- a/src/mongo/db/query/compiler/optimizer/join/executor.cpp +++ b/src/mongo/db/query/compiler/optimizer/join/executor.cpp @@ -34,6 +34,7 @@ #include "mongo/db/pipeline/document_source.h" #include "mongo/db/pipeline/document_source_lookup.h" #include "mongo/db/query/compiler/optimizer/join/agg_join_model.h" +#include "mongo/db/query/compiler/optimizer/join/cardinality_estimator.h" #include "mongo/db/query/compiler/optimizer/join/join_reordering_context.h" #include "mongo/db/query/compiler/optimizer/join/reorder_joins.h" #include "mongo/db/query/compiler/optimizer/join/single_table_access.h" @@ -213,11 +214,14 @@ StatusWith getJoinReorderedExecutor( ReorderedJoinSolution reordered; switch (qkc.getJoinReorderMode()) { - case JoinReorderModeEnum::kBottomUp: + case JoinReorderModeEnum::kBottomUp: { // Optimize join order using bottom-up Sellinger-style algorithm. - reordered = constructSolutionBottomUp(std::move(ctx), - getPlanTreeShape(qkc.getJoinPlanTreeShape())); + JoinCardinalityEstimator estimator = JoinCardinalityEstimator::make( + ctx, swAccessPlans.getValue().estimate, samplingEstimators); + reordered = constructSolutionBottomUp( + std::move(ctx), std::move(estimator), getPlanTreeShape(qkc.getJoinPlanTreeShape())); break; + } case JoinReorderModeEnum::kRandom: // Randomly reorder joins. reordered = diff --git a/src/mongo/db/query/compiler/optimizer/join/plan_enumerator.cpp b/src/mongo/db/query/compiler/optimizer/join/plan_enumerator.cpp index a10a2eec148..ed37c5f2b9f 100644 --- a/src/mongo/db/query/compiler/optimizer/join/plan_enumerator.cpp +++ b/src/mongo/db/query/compiler/optimizer/join/plan_enumerator.cpp @@ -70,7 +70,7 @@ bool PlanEnumeratorContext::canPlanBeEnumerated(PlanTreeShape type, case PlanTreeShape::ZIG_ZAG: // We create a zig-zag plan by alternating which side we add a "base" join subset to. - // TODO SERVER-113059: Pick based on which side has smaller CE. + // TODO SERVER-115147: Pick based on which side has smaller CE. if (left.isBaseCollectionAccess() && right.isBaseCollectionAccess()) { /** * We always allow a join like this as a base case: @@ -162,7 +162,6 @@ void PlanEnumeratorContext::addJoinPlan(PlanTreeShape type, return; } } else { - // TODO SERVER-113059: Rudimentary cost metric/tracking. subset.plans.push_back( _registry.registerJoinNode(subset, method, left.bestPlan(), right.bestPlan())); } diff --git a/src/mongo/db/query/compiler/optimizer/join/plan_enumerator.h b/src/mongo/db/query/compiler/optimizer/join/plan_enumerator.h index 59583d72b03..6fb44a141c2 100644 --- a/src/mongo/db/query/compiler/optimizer/join/plan_enumerator.h +++ b/src/mongo/db/query/compiler/optimizer/join/plan_enumerator.h @@ -29,6 +29,7 @@ #pragma once +#include "mongo/db/query/compiler/optimizer/join/cardinality_estimator.h" #include "mongo/db/query/compiler/optimizer/join/join_plan.h" #include "mongo/db/query/compiler/optimizer/join/join_reordering_context.h" #include "mongo/util/modules.h" @@ -46,7 +47,9 @@ enum class PlanTreeShape { LEFT_DEEP, RIGHT_DEEP, ZIG_ZAG }; */ class PlanEnumeratorContext { public: - PlanEnumeratorContext(const JoinReorderingContext& ctx) : _ctx{ctx} {} + PlanEnumeratorContext(const JoinReorderingContext& ctx, + const JoinCardinalityEstimator& estimator) + : _ctx{ctx}, _estimator(estimator) {} // Delete copy and move operations to prevent issues with copying '_joinGraph'. PlanEnumeratorContext(const PlanEnumeratorContext&) = delete; @@ -115,6 +118,7 @@ private: const JoinSubset& subset) const; const JoinReorderingContext& _ctx; + const JoinCardinalityEstimator& _estimator; // Hold intermediate results of the enumeration algorithm. The index into the outer vector // represents the "level". The i'th level contains solutions for the optimal way to join all diff --git a/src/mongo/db/query/compiler/optimizer/join/plan_enumerator_test.cpp b/src/mongo/db/query/compiler/optimizer/join/plan_enumerator_test.cpp index b3128f152a7..e24173d0562 100644 --- a/src/mongo/db/query/compiler/optimizer/join/plan_enumerator_test.cpp +++ b/src/mongo/db/query/compiler/optimizer/join/plan_enumerator_test.cpp @@ -29,6 +29,7 @@ #include "mongo/db/query/compiler/optimizer/join/plan_enumerator.h" +#include "mongo/db/query/compiler/optimizer/join/cardinality_estimator.h" #include "mongo/db/query/compiler/optimizer/join/plan_enumerator_helpers.h" #include "mongo/db/query/compiler/optimizer/join/unit_test_helpers.h" #include "mongo/unittest/death_test.h" @@ -119,7 +120,7 @@ public: } } - PlanEnumeratorContext ctx{jCtx}; + PlanEnumeratorContext ctx{jCtx, emptyEstimator}; ctx.enumerateJoinSubsets(shape); ASSERT_EQ(numNodes, ctx.getSubsets(0).size()); for (size_t k = 1; k < numNodes; ++k) { @@ -137,6 +138,8 @@ public: goldenCtx->outStream() << ctx.toString() << std::endl; } } + + JoinCardinalityEstimator emptyEstimator = JoinCardinalityEstimator({}, {}); }; TEST_F(JoinPlanEnumeratorTest, InitializeSubsetsTwo) { @@ -146,7 +149,7 @@ TEST_F(JoinPlanEnumeratorTest, InitializeSubsetsTwo) { graph.addSimpleEqualityEdge((NodeId)0, (NodeId)1, 0, 1); { - PlanEnumeratorContext ctx{jCtx}; + PlanEnumeratorContext ctx{jCtx, emptyEstimator}; ctx.enumerateJoinSubsets(PlanTreeShape::LEFT_DEEP); auto& level0 = ctx.getSubsets(0); @@ -163,7 +166,7 @@ TEST_F(JoinPlanEnumeratorTest, InitializeSubsetsTwo) { } { - PlanEnumeratorContext ctx{jCtx}; + PlanEnumeratorContext ctx{jCtx, emptyEstimator}; ctx.enumerateJoinSubsets(PlanTreeShape::RIGHT_DEEP); auto& level0 = ctx.getSubsets(0); @@ -189,7 +192,7 @@ TEST_F(JoinPlanEnumeratorTest, InitializeSubsetsThree) { graph.addSimpleEqualityEdge(NodeId(1), NodeId(2), 1, 2); { - PlanEnumeratorContext ctx{jCtx}; + PlanEnumeratorContext ctx{jCtx, emptyEstimator}; ctx.enumerateJoinSubsets(PlanTreeShape::LEFT_DEEP); auto& level0 = ctx.getSubsets(0); @@ -213,7 +216,7 @@ TEST_F(JoinPlanEnumeratorTest, InitializeSubsetsThree) { } { - PlanEnumeratorContext ctx{jCtx}; + PlanEnumeratorContext ctx{jCtx, emptyEstimator}; ctx.enumerateJoinSubsets(PlanTreeShape::RIGHT_DEEP); auto& level0 = ctx.getSubsets(0); @@ -245,7 +248,7 @@ TEST_F(JoinPlanEnumeratorTest, InitializeSubsetsThreeNoCycle) { graph.addSimpleEqualityEdge(NodeId(0), NodeId(2), 0, 2); { - PlanEnumeratorContext ctx{jCtx}; + PlanEnumeratorContext ctx{jCtx, emptyEstimator}; ctx.enumerateJoinSubsets(PlanTreeShape::LEFT_DEEP); auto& level0 = ctx.getSubsets(0); @@ -269,7 +272,7 @@ TEST_F(JoinPlanEnumeratorTest, InitializeSubsetsThreeNoCycle) { } { - PlanEnumeratorContext ctx{jCtx}; + PlanEnumeratorContext ctx{jCtx, emptyEstimator}; ctx.enumerateJoinSubsets(PlanTreeShape::RIGHT_DEEP); auto& level0 = ctx.getSubsets(0); diff --git a/src/mongo/db/query/compiler/optimizer/join/reorder_joins.cpp b/src/mongo/db/query/compiler/optimizer/join/reorder_joins.cpp index f65f2e7f188..516791e78f7 100644 --- a/src/mongo/db/query/compiler/optimizer/join/reorder_joins.cpp +++ b/src/mongo/db/query/compiler/optimizer/join/reorder_joins.cpp @@ -380,8 +380,9 @@ ReorderedJoinSolution constructSolutionWithRandomOrder(const JoinReorderingConte } ReorderedJoinSolution constructSolutionBottomUp(const JoinReorderingContext& ctx, + JoinCardinalityEstimator estimator, PlanTreeShape shape) { - PlanEnumeratorContext peCtx(ctx); + PlanEnumeratorContext peCtx(ctx, estimator); peCtx.enumerateJoinSubsets(shape); auto bestPlanNodeId = peCtx.getBestFinalPlan(); diff --git a/src/mongo/db/query/compiler/optimizer/join/reorder_joins.h b/src/mongo/db/query/compiler/optimizer/join/reorder_joins.h index 17f81ecb344..2bfdadf002f 100644 --- a/src/mongo/db/query/compiler/optimizer/join/reorder_joins.h +++ b/src/mongo/db/query/compiler/optimizer/join/reorder_joins.h @@ -28,6 +28,7 @@ */ #pragma once +#include "mongo/db/query/compiler/optimizer/join/cardinality_estimator.h" #include "mongo/db/query/compiler/optimizer/join/join_reordering_context.h" #include "mongo/db/query/compiler/optimizer/join/plan_enumerator.h" #include "mongo/util/modules.h" @@ -56,6 +57,7 @@ ReorderedJoinSolution constructSolutionWithRandomOrder(const JoinReorderingConte * Sellinger-style join optimization. */ ReorderedJoinSolution constructSolutionBottomUp(const JoinReorderingContext& ctx, + JoinCardinalityEstimator estimator, PlanTreeShape shape); } // namespace mongo::join_ordering