diff --git a/src/mongo/db/query/compiler/ce/sampling/sampling_estimator.h b/src/mongo/db/query/compiler/ce/sampling/sampling_estimator.h index d22c4cc9fca..b90ce52c257 100644 --- a/src/mongo/db/query/compiler/ce/sampling/sampling_estimator.h +++ b/src/mongo/db/query/compiler/ce/sampling/sampling_estimator.h @@ -99,6 +99,8 @@ public: * Does not support estimating NDV over array-valued fields. */ virtual CardinalityEstimate estimateNDV(const std::vector& fieldNames) const = 0; + + virtual double getCollCard() const = 0; }; } // namespace mongo::ce diff --git a/src/mongo/db/query/compiler/ce/sampling/sampling_estimator_impl.h b/src/mongo/db/query/compiler/ce/sampling/sampling_estimator_impl.h index 39309f5a9eb..9bd1774abf3 100644 --- a/src/mongo/db/query/compiler/ce/sampling/sampling_estimator_impl.h +++ b/src/mongo/db/query/compiler/ce/sampling/sampling_estimator_impl.h @@ -240,6 +240,10 @@ public: } } + double getCollCard() const override { + return _collectionCard.toDouble(); + } + protected: /* * This helper creates a CanonicalQuery for the sampling plan. This CanonicalQuery is “empty” @@ -250,10 +254,6 @@ protected: static std::unique_ptr makeEmptyCanonicalQuery(const NamespaceString& nss, OperationContext* opCtx); - double getCollCard() const { - return _collectionCard.cardinality().v(); - } - /* * The sample size is calculated based on the confidence level and margin of error(MoE) * required. n = Z^2 / W^2 diff --git a/src/mongo/db/query/compiler/ce/sampling/sampling_test_utils.h b/src/mongo/db/query/compiler/ce/sampling/sampling_test_utils.h index 22a46fbac91..6227c6d3f41 100644 --- a/src/mongo/db/query/compiler/ce/sampling/sampling_test_utils.h +++ b/src/mongo/db/query/compiler/ce/sampling/sampling_test_utils.h @@ -58,10 +58,6 @@ public: return SamplingEstimatorImpl::calculateSampleSize(ci, marginOfError); } - double getCollCard() { - return SamplingEstimatorImpl::getCollCard(); - } - static bool matches(const OrderedIntervalList& oil, BSONElement val) { return SamplingEstimatorImpl::matches(oil, val); } diff --git a/src/mongo/db/query/compiler/optimizer/join/plan_enumerator_helpers.cpp b/src/mongo/db/query/compiler/optimizer/join/plan_enumerator_helpers.cpp index dfaa330f47b..fb73003e566 100644 --- a/src/mongo/db/query/compiler/optimizer/join/plan_enumerator_helpers.cpp +++ b/src/mongo/db/query/compiler/optimizer/join/plan_enumerator_helpers.cpp @@ -77,12 +77,8 @@ uint64_t combinations(int n, int k) { JoinPredicateEstimator::JoinPredicateEstimator(const JoinGraph& graph, const std::vector& resolvedPaths, - const SamplingEstimatorMap& samplingEstimators, - const BaseTableCardinalityMap& tableCards) - : _graph(graph), - _resolvedPaths(resolvedPaths), - _samplingEstimators(samplingEstimators), - _tableCards(tableCards) {} + const SamplingEstimatorMap& samplingEstimators) + : _graph(graph), _resolvedPaths(resolvedPaths), _samplingEstimators(samplingEstimators) {} // This function makes a number of assumptions: // * Join predicate are independent from single table predicates. This allows us to estimate them @@ -133,8 +129,8 @@ cost_based_ranker::SelectivityEstimate JoinPredicateEstimator::joinPredicateSel( // Extract the cardinality estimates for left and right nodes before single table predicates are // applied. - auto leftCard = _tableCards.at(leftNode.collectionName); - auto rightCard = _tableCards.at(rightNode.collectionName); + auto leftCard = _samplingEstimators.at(leftNode.collectionName)->getCollCard(); + auto rightCard = _samplingEstimators.at(rightNode.collectionName)->getCollCard(); // For the purposes of estimation, we assume that this edge represents a "primary key" to // "foreign key" join, despite these concepts not existing in MongoDB. We also assume that the diff --git a/src/mongo/db/query/compiler/optimizer/join/plan_enumerator_helpers.h b/src/mongo/db/query/compiler/optimizer/join/plan_enumerator_helpers.h index 51da4432392..18a4db97852 100644 --- a/src/mongo/db/query/compiler/optimizer/join/plan_enumerator_helpers.h +++ b/src/mongo/db/query/compiler/optimizer/join/plan_enumerator_helpers.h @@ -69,9 +69,6 @@ private: */ uint64_t combinations(int n, int k); -using BaseTableCardinalityMap = - stdx::unordered_map; - /** * Container for all objects necessary to estimate the selectivity of join predicates. */ @@ -79,8 +76,7 @@ class JoinPredicateEstimator { public: JoinPredicateEstimator(const JoinGraph& graph, const std::vector& resolvedPaths, - const SamplingEstimatorMap& samplingEstimators, - const BaseTableCardinalityMap& tableCards); + const SamplingEstimatorMap& samplingEstimators); /** * Returns an estimate of the selectivity of the given 'JoinEdge' using sampling. @@ -91,7 +87,6 @@ private: const JoinGraph& _graph; const std::vector& _resolvedPaths; const SamplingEstimatorMap& _samplingEstimators; - const BaseTableCardinalityMap& _tableCards; }; } // namespace mongo::join_ordering diff --git a/src/mongo/db/query/compiler/optimizer/join/plan_enumerator_test.cpp b/src/mongo/db/query/compiler/optimizer/join/plan_enumerator_test.cpp index 1029e37d0ee..24d74295329 100644 --- a/src/mongo/db/query/compiler/optimizer/join/plan_enumerator_test.cpp +++ b/src/mongo/db/query/compiler/optimizer/join/plan_enumerator_test.cpp @@ -333,17 +333,15 @@ TEST_F(JoinPredicateEstimatorFixture, NDVSmallerCollection) { graph.addSimpleEqualityEdge(aNodeId, bNodeId, 0, 1); SamplingEstimatorMap samplingEstimators; - auto aSamplingEstimator = std::make_unique(); + auto aSamplingEstimator = std::make_unique( + CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling}); aSamplingEstimator->addFakeNDVEstimate( {FieldPath("foo")}, CardinalityEstimate{CardinalityType{5}, EstimationSource::Sampling}); samplingEstimators[aNss] = std::move(aSamplingEstimator); - samplingEstimators[bNss] = std::make_unique(); + samplingEstimators[bNss] = std::make_unique( + CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling}); - BaseTableCardinalityMap tableCards; - tableCards.emplace(aNss, CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling}); - tableCards.emplace(bNss, CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling}); - - JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators, tableCards}; + JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators}; auto selEst = predEstimator.joinPredicateSel(graph.getEdge(0)); // The selectivity estimate comes from 1 / NDV(A.foo) = 1 / 5 = 0.2 @@ -371,19 +369,16 @@ TEST_F(JoinPredicateEstimatorFixture, NDVSmallerCollectionEmbedPath) { graph.addSimpleEqualityEdge(aNodeId, bNodeId, 0, 1); SamplingEstimatorMap samplingEstimators; - samplingEstimators[aNss] = std::make_unique(); - // Only add fake estimates for "b" estimator - auto bSamplingEstimator = std::make_unique(); + samplingEstimators[aNss] = std::make_unique( + CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling}); + // Ensure "b" collection has smaller CE. Only add fake estimates for "b" estimator. + auto bSamplingEstimator = std::make_unique( + CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling}); bSamplingEstimator->addFakeNDVEstimate( {FieldPath("foo")}, CardinalityEstimate{CardinalityType{5}, EstimationSource::Sampling}); samplingEstimators[bNss] = std::move(bSamplingEstimator); - BaseTableCardinalityMap tableCards; - tableCards.emplace(aNss, CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling}); - // Ensure "b" collection has smaller CE - tableCards.emplace(bNss, CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling}); - - JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators, tableCards}; + JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators}; auto selEst = predEstimator.joinPredicateSel(graph.getEdge(0)); // The selectivity estimate comes from 1 / NDV(B.foo) = 1 / 5 = 0.2 @@ -414,8 +409,9 @@ TEST_F(JoinPredicateEstimatorFixture, NDVCompoundJoinKey) { graph.addSimpleEqualityEdge(aNodeId, bNodeId, 2, 3); SamplingEstimatorMap samplingEstimators; - auto aSamplingEstimator = std::make_unique(); - // We shoudl end up using the NDV from (foo, bar) and not from foo or bar. + auto aSamplingEstimator = std::make_unique( + CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling}); + // We should end up using the NDV from (foo, bar) and not from foo or bar. aSamplingEstimator->addFakeNDVEstimate( {FieldPath("foo"), FieldPath("bar")}, CardinalityEstimate{CardinalityType{5}, EstimationSource::Sampling}); @@ -424,13 +420,10 @@ TEST_F(JoinPredicateEstimatorFixture, NDVCompoundJoinKey) { aSamplingEstimator->addFakeNDVEstimate( {FieldPath("bar")}, CardinalityEstimate{CardinalityType{3}, EstimationSource::Sampling}); samplingEstimators[aNss] = std::move(aSamplingEstimator); - samplingEstimators[bNss] = std::make_unique(); + samplingEstimators[bNss] = std::make_unique( + CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling}); - BaseTableCardinalityMap tableCards; - tableCards.emplace(aNss, CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling}); - tableCards.emplace(bNss, CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling}); - - JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators, tableCards}; + JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators}; auto selEst = predEstimator.joinPredicateSel(graph.getEdge(0)); // The selectivity estimate comes from 1 / NDV(A.foo, A.bar) = 1 / 5 = 0.2 diff --git a/src/mongo/db/query/compiler/optimizer/join/unit_test_helpers.h b/src/mongo/db/query/compiler/optimizer/join/unit_test_helpers.h index 37cd7dbac51..3df8da4a718 100644 --- a/src/mongo/db/query/compiler/optimizer/join/unit_test_helpers.h +++ b/src/mongo/db/query/compiler/optimizer/join/unit_test_helpers.h @@ -73,6 +73,8 @@ using namespace cost_based_ranker; */ class FakeNdvEstimator : public ce::SamplingEstimator { public: + FakeNdvEstimator(CardinalityEstimate collCard) : _collCard(collCard) {}; + CardinalityEstimate estimateCardinality(const MatchExpression* expr) const override { MONGO_UNREACHABLE; } @@ -115,7 +117,12 @@ public: return _fakeEstimates.at(fieldNames); } + double getCollCard() const override { + return _collCard.toDouble(); + } + private: + CardinalityEstimate _collCard; stdx::unordered_map, CardinalityEstimate> _fakeEstimates; };