SERVER-115144: Remove table cardinality as input to JoinPredEstimator (#44954)

GitOrigin-RevId: f44de95cdd0bb92c29cd67bebeae941ea264fa50
This commit is contained in:
HanaPearlman 2025-12-10 13:48:02 -05:00 committed by MongoDB Bot
parent aa60d990b7
commit d83ce90ff5
7 changed files with 35 additions and 46 deletions

View File

@ -99,6 +99,8 @@ public:
* Does not support estimating NDV over array-valued fields.
*/
virtual CardinalityEstimate estimateNDV(const std::vector<FieldPath>& fieldNames) const = 0;
virtual double getCollCard() const = 0;
};
} // namespace mongo::ce

View File

@ -240,6 +240,10 @@ public:
}
}
double getCollCard() const override {
return _collectionCard.toDouble();
}
protected:
/*
* This helper creates a CanonicalQuery for the sampling plan. This CanonicalQuery is empty
@ -250,10 +254,6 @@ protected:
static std::unique_ptr<CanonicalQuery> makeEmptyCanonicalQuery(const NamespaceString& nss,
OperationContext* opCtx);
double getCollCard() const {
return _collectionCard.cardinality().v();
}
/*
* The sample size is calculated based on the confidence level and margin of error(MoE)
* required. n = Z^2 / W^2

View File

@ -58,10 +58,6 @@ public:
return SamplingEstimatorImpl::calculateSampleSize(ci, marginOfError);
}
double getCollCard() {
return SamplingEstimatorImpl::getCollCard();
}
static bool matches(const OrderedIntervalList& oil, BSONElement val) {
return SamplingEstimatorImpl::matches(oil, val);
}

View File

@ -77,12 +77,8 @@ uint64_t combinations(int n, int k) {
JoinPredicateEstimator::JoinPredicateEstimator(const JoinGraph& graph,
const std::vector<ResolvedPath>& resolvedPaths,
const SamplingEstimatorMap& samplingEstimators,
const BaseTableCardinalityMap& tableCards)
: _graph(graph),
_resolvedPaths(resolvedPaths),
_samplingEstimators(samplingEstimators),
_tableCards(tableCards) {}
const SamplingEstimatorMap& samplingEstimators)
: _graph(graph), _resolvedPaths(resolvedPaths), _samplingEstimators(samplingEstimators) {}
// This function makes a number of assumptions:
// * Join predicate are independent from single table predicates. This allows us to estimate them
@ -133,8 +129,8 @@ cost_based_ranker::SelectivityEstimate JoinPredicateEstimator::joinPredicateSel(
// Extract the cardinality estimates for left and right nodes before single table predicates are
// applied.
auto leftCard = _tableCards.at(leftNode.collectionName);
auto rightCard = _tableCards.at(rightNode.collectionName);
auto leftCard = _samplingEstimators.at(leftNode.collectionName)->getCollCard();
auto rightCard = _samplingEstimators.at(rightNode.collectionName)->getCollCard();
// For the purposes of estimation, we assume that this edge represents a "primary key" to
// "foreign key" join, despite these concepts not existing in MongoDB. We also assume that the

View File

@ -69,9 +69,6 @@ private:
*/
uint64_t combinations(int n, int k);
using BaseTableCardinalityMap =
stdx::unordered_map<NamespaceString, cost_based_ranker::CardinalityEstimate>;
/**
* Container for all objects necessary to estimate the selectivity of join predicates.
*/
@ -79,8 +76,7 @@ class JoinPredicateEstimator {
public:
JoinPredicateEstimator(const JoinGraph& graph,
const std::vector<ResolvedPath>& resolvedPaths,
const SamplingEstimatorMap& samplingEstimators,
const BaseTableCardinalityMap& tableCards);
const SamplingEstimatorMap& samplingEstimators);
/**
* Returns an estimate of the selectivity of the given 'JoinEdge' using sampling.
@ -91,7 +87,6 @@ private:
const JoinGraph& _graph;
const std::vector<ResolvedPath>& _resolvedPaths;
const SamplingEstimatorMap& _samplingEstimators;
const BaseTableCardinalityMap& _tableCards;
};
} // namespace mongo::join_ordering

View File

@ -333,17 +333,15 @@ TEST_F(JoinPredicateEstimatorFixture, NDVSmallerCollection) {
graph.addSimpleEqualityEdge(aNodeId, bNodeId, 0, 1);
SamplingEstimatorMap samplingEstimators;
auto aSamplingEstimator = std::make_unique<FakeNdvEstimator>();
auto aSamplingEstimator = std::make_unique<FakeNdvEstimator>(
CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling});
aSamplingEstimator->addFakeNDVEstimate(
{FieldPath("foo")}, CardinalityEstimate{CardinalityType{5}, EstimationSource::Sampling});
samplingEstimators[aNss] = std::move(aSamplingEstimator);
samplingEstimators[bNss] = std::make_unique<FakeNdvEstimator>();
samplingEstimators[bNss] = std::make_unique<FakeNdvEstimator>(
CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling});
BaseTableCardinalityMap tableCards;
tableCards.emplace(aNss, CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling});
tableCards.emplace(bNss, CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling});
JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators, tableCards};
JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators};
auto selEst = predEstimator.joinPredicateSel(graph.getEdge(0));
// The selectivity estimate comes from 1 / NDV(A.foo) = 1 / 5 = 0.2
@ -371,19 +369,16 @@ TEST_F(JoinPredicateEstimatorFixture, NDVSmallerCollectionEmbedPath) {
graph.addSimpleEqualityEdge(aNodeId, bNodeId, 0, 1);
SamplingEstimatorMap samplingEstimators;
samplingEstimators[aNss] = std::make_unique<FakeNdvEstimator>();
// Only add fake estimates for "b" estimator
auto bSamplingEstimator = std::make_unique<FakeNdvEstimator>();
samplingEstimators[aNss] = std::make_unique<FakeNdvEstimator>(
CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling});
// Ensure "b" collection has smaller CE. Only add fake estimates for "b" estimator.
auto bSamplingEstimator = std::make_unique<FakeNdvEstimator>(
CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling});
bSamplingEstimator->addFakeNDVEstimate(
{FieldPath("foo")}, CardinalityEstimate{CardinalityType{5}, EstimationSource::Sampling});
samplingEstimators[bNss] = std::move(bSamplingEstimator);
BaseTableCardinalityMap tableCards;
tableCards.emplace(aNss, CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling});
// Ensure "b" collection has smaller CE
tableCards.emplace(bNss, CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling});
JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators, tableCards};
JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators};
auto selEst = predEstimator.joinPredicateSel(graph.getEdge(0));
// The selectivity estimate comes from 1 / NDV(B.foo) = 1 / 5 = 0.2
@ -414,8 +409,9 @@ TEST_F(JoinPredicateEstimatorFixture, NDVCompoundJoinKey) {
graph.addSimpleEqualityEdge(aNodeId, bNodeId, 2, 3);
SamplingEstimatorMap samplingEstimators;
auto aSamplingEstimator = std::make_unique<FakeNdvEstimator>();
// We shoudl end up using the NDV from (foo, bar) and not from foo or bar.
auto aSamplingEstimator = std::make_unique<FakeNdvEstimator>(
CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling});
// We should end up using the NDV from (foo, bar) and not from foo or bar.
aSamplingEstimator->addFakeNDVEstimate(
{FieldPath("foo"), FieldPath("bar")},
CardinalityEstimate{CardinalityType{5}, EstimationSource::Sampling});
@ -424,13 +420,10 @@ TEST_F(JoinPredicateEstimatorFixture, NDVCompoundJoinKey) {
aSamplingEstimator->addFakeNDVEstimate(
{FieldPath("bar")}, CardinalityEstimate{CardinalityType{3}, EstimationSource::Sampling});
samplingEstimators[aNss] = std::move(aSamplingEstimator);
samplingEstimators[bNss] = std::make_unique<FakeNdvEstimator>();
samplingEstimators[bNss] = std::make_unique<FakeNdvEstimator>(
CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling});
BaseTableCardinalityMap tableCards;
tableCards.emplace(aNss, CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling});
tableCards.emplace(bNss, CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling});
JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators, tableCards};
JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators};
auto selEst = predEstimator.joinPredicateSel(graph.getEdge(0));
// The selectivity estimate comes from 1 / NDV(A.foo, A.bar) = 1 / 5 = 0.2

View File

@ -73,6 +73,8 @@ using namespace cost_based_ranker;
*/
class FakeNdvEstimator : public ce::SamplingEstimator {
public:
FakeNdvEstimator(CardinalityEstimate collCard) : _collCard(collCard) {};
CardinalityEstimate estimateCardinality(const MatchExpression* expr) const override {
MONGO_UNREACHABLE;
}
@ -115,7 +117,12 @@ public:
return _fakeEstimates.at(fieldNames);
}
double getCollCard() const override {
return _collCard.toDouble();
}
private:
CardinalityEstimate _collCard;
stdx::unordered_map<std::vector<FieldPath>, CardinalityEstimate> _fakeEstimates;
};