mirror of https://github.com/mongodb/mongo
SERVER-115144: Remove table cardinality as input to JoinPredEstimator (#44954)
GitOrigin-RevId: f44de95cdd0bb92c29cd67bebeae941ea264fa50
This commit is contained in:
parent
aa60d990b7
commit
d83ce90ff5
|
|
@ -99,6 +99,8 @@ public:
|
|||
* Does not support estimating NDV over array-valued fields.
|
||||
*/
|
||||
virtual CardinalityEstimate estimateNDV(const std::vector<FieldPath>& fieldNames) const = 0;
|
||||
|
||||
virtual double getCollCard() const = 0;
|
||||
};
|
||||
|
||||
} // namespace mongo::ce
|
||||
|
|
|
|||
|
|
@ -240,6 +240,10 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
double getCollCard() const override {
|
||||
return _collectionCard.toDouble();
|
||||
}
|
||||
|
||||
protected:
|
||||
/*
|
||||
* This helper creates a CanonicalQuery for the sampling plan. This CanonicalQuery is “empty”
|
||||
|
|
@ -250,10 +254,6 @@ protected:
|
|||
static std::unique_ptr<CanonicalQuery> makeEmptyCanonicalQuery(const NamespaceString& nss,
|
||||
OperationContext* opCtx);
|
||||
|
||||
double getCollCard() const {
|
||||
return _collectionCard.cardinality().v();
|
||||
}
|
||||
|
||||
/*
|
||||
* The sample size is calculated based on the confidence level and margin of error(MoE)
|
||||
* required. n = Z^2 / W^2
|
||||
|
|
|
|||
|
|
@ -58,10 +58,6 @@ public:
|
|||
return SamplingEstimatorImpl::calculateSampleSize(ci, marginOfError);
|
||||
}
|
||||
|
||||
double getCollCard() {
|
||||
return SamplingEstimatorImpl::getCollCard();
|
||||
}
|
||||
|
||||
static bool matches(const OrderedIntervalList& oil, BSONElement val) {
|
||||
return SamplingEstimatorImpl::matches(oil, val);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -77,12 +77,8 @@ uint64_t combinations(int n, int k) {
|
|||
|
||||
JoinPredicateEstimator::JoinPredicateEstimator(const JoinGraph& graph,
|
||||
const std::vector<ResolvedPath>& resolvedPaths,
|
||||
const SamplingEstimatorMap& samplingEstimators,
|
||||
const BaseTableCardinalityMap& tableCards)
|
||||
: _graph(graph),
|
||||
_resolvedPaths(resolvedPaths),
|
||||
_samplingEstimators(samplingEstimators),
|
||||
_tableCards(tableCards) {}
|
||||
const SamplingEstimatorMap& samplingEstimators)
|
||||
: _graph(graph), _resolvedPaths(resolvedPaths), _samplingEstimators(samplingEstimators) {}
|
||||
|
||||
// This function makes a number of assumptions:
|
||||
// * Join predicate are independent from single table predicates. This allows us to estimate them
|
||||
|
|
@ -133,8 +129,8 @@ cost_based_ranker::SelectivityEstimate JoinPredicateEstimator::joinPredicateSel(
|
|||
|
||||
// Extract the cardinality estimates for left and right nodes before single table predicates are
|
||||
// applied.
|
||||
auto leftCard = _tableCards.at(leftNode.collectionName);
|
||||
auto rightCard = _tableCards.at(rightNode.collectionName);
|
||||
auto leftCard = _samplingEstimators.at(leftNode.collectionName)->getCollCard();
|
||||
auto rightCard = _samplingEstimators.at(rightNode.collectionName)->getCollCard();
|
||||
|
||||
// For the purposes of estimation, we assume that this edge represents a "primary key" to
|
||||
// "foreign key" join, despite these concepts not existing in MongoDB. We also assume that the
|
||||
|
|
|
|||
|
|
@ -69,9 +69,6 @@ private:
|
|||
*/
|
||||
uint64_t combinations(int n, int k);
|
||||
|
||||
using BaseTableCardinalityMap =
|
||||
stdx::unordered_map<NamespaceString, cost_based_ranker::CardinalityEstimate>;
|
||||
|
||||
/**
|
||||
* Container for all objects necessary to estimate the selectivity of join predicates.
|
||||
*/
|
||||
|
|
@ -79,8 +76,7 @@ class JoinPredicateEstimator {
|
|||
public:
|
||||
JoinPredicateEstimator(const JoinGraph& graph,
|
||||
const std::vector<ResolvedPath>& resolvedPaths,
|
||||
const SamplingEstimatorMap& samplingEstimators,
|
||||
const BaseTableCardinalityMap& tableCards);
|
||||
const SamplingEstimatorMap& samplingEstimators);
|
||||
|
||||
/**
|
||||
* Returns an estimate of the selectivity of the given 'JoinEdge' using sampling.
|
||||
|
|
@ -91,7 +87,6 @@ private:
|
|||
const JoinGraph& _graph;
|
||||
const std::vector<ResolvedPath>& _resolvedPaths;
|
||||
const SamplingEstimatorMap& _samplingEstimators;
|
||||
const BaseTableCardinalityMap& _tableCards;
|
||||
};
|
||||
|
||||
} // namespace mongo::join_ordering
|
||||
|
|
|
|||
|
|
@ -333,17 +333,15 @@ TEST_F(JoinPredicateEstimatorFixture, NDVSmallerCollection) {
|
|||
graph.addSimpleEqualityEdge(aNodeId, bNodeId, 0, 1);
|
||||
|
||||
SamplingEstimatorMap samplingEstimators;
|
||||
auto aSamplingEstimator = std::make_unique<FakeNdvEstimator>();
|
||||
auto aSamplingEstimator = std::make_unique<FakeNdvEstimator>(
|
||||
CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling});
|
||||
aSamplingEstimator->addFakeNDVEstimate(
|
||||
{FieldPath("foo")}, CardinalityEstimate{CardinalityType{5}, EstimationSource::Sampling});
|
||||
samplingEstimators[aNss] = std::move(aSamplingEstimator);
|
||||
samplingEstimators[bNss] = std::make_unique<FakeNdvEstimator>();
|
||||
samplingEstimators[bNss] = std::make_unique<FakeNdvEstimator>(
|
||||
CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling});
|
||||
|
||||
BaseTableCardinalityMap tableCards;
|
||||
tableCards.emplace(aNss, CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling});
|
||||
tableCards.emplace(bNss, CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling});
|
||||
|
||||
JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators, tableCards};
|
||||
JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators};
|
||||
|
||||
auto selEst = predEstimator.joinPredicateSel(graph.getEdge(0));
|
||||
// The selectivity estimate comes from 1 / NDV(A.foo) = 1 / 5 = 0.2
|
||||
|
|
@ -371,19 +369,16 @@ TEST_F(JoinPredicateEstimatorFixture, NDVSmallerCollectionEmbedPath) {
|
|||
graph.addSimpleEqualityEdge(aNodeId, bNodeId, 0, 1);
|
||||
|
||||
SamplingEstimatorMap samplingEstimators;
|
||||
samplingEstimators[aNss] = std::make_unique<FakeNdvEstimator>();
|
||||
// Only add fake estimates for "b" estimator
|
||||
auto bSamplingEstimator = std::make_unique<FakeNdvEstimator>();
|
||||
samplingEstimators[aNss] = std::make_unique<FakeNdvEstimator>(
|
||||
CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling});
|
||||
// Ensure "b" collection has smaller CE. Only add fake estimates for "b" estimator.
|
||||
auto bSamplingEstimator = std::make_unique<FakeNdvEstimator>(
|
||||
CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling});
|
||||
bSamplingEstimator->addFakeNDVEstimate(
|
||||
{FieldPath("foo")}, CardinalityEstimate{CardinalityType{5}, EstimationSource::Sampling});
|
||||
samplingEstimators[bNss] = std::move(bSamplingEstimator);
|
||||
|
||||
BaseTableCardinalityMap tableCards;
|
||||
tableCards.emplace(aNss, CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling});
|
||||
// Ensure "b" collection has smaller CE
|
||||
tableCards.emplace(bNss, CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling});
|
||||
|
||||
JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators, tableCards};
|
||||
JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators};
|
||||
|
||||
auto selEst = predEstimator.joinPredicateSel(graph.getEdge(0));
|
||||
// The selectivity estimate comes from 1 / NDV(B.foo) = 1 / 5 = 0.2
|
||||
|
|
@ -414,8 +409,9 @@ TEST_F(JoinPredicateEstimatorFixture, NDVCompoundJoinKey) {
|
|||
graph.addSimpleEqualityEdge(aNodeId, bNodeId, 2, 3);
|
||||
|
||||
SamplingEstimatorMap samplingEstimators;
|
||||
auto aSamplingEstimator = std::make_unique<FakeNdvEstimator>();
|
||||
// We shoudl end up using the NDV from (foo, bar) and not from foo or bar.
|
||||
auto aSamplingEstimator = std::make_unique<FakeNdvEstimator>(
|
||||
CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling});
|
||||
// We should end up using the NDV from (foo, bar) and not from foo or bar.
|
||||
aSamplingEstimator->addFakeNDVEstimate(
|
||||
{FieldPath("foo"), FieldPath("bar")},
|
||||
CardinalityEstimate{CardinalityType{5}, EstimationSource::Sampling});
|
||||
|
|
@ -424,13 +420,10 @@ TEST_F(JoinPredicateEstimatorFixture, NDVCompoundJoinKey) {
|
|||
aSamplingEstimator->addFakeNDVEstimate(
|
||||
{FieldPath("bar")}, CardinalityEstimate{CardinalityType{3}, EstimationSource::Sampling});
|
||||
samplingEstimators[aNss] = std::move(aSamplingEstimator);
|
||||
samplingEstimators[bNss] = std::make_unique<FakeNdvEstimator>();
|
||||
samplingEstimators[bNss] = std::make_unique<FakeNdvEstimator>(
|
||||
CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling});
|
||||
|
||||
BaseTableCardinalityMap tableCards;
|
||||
tableCards.emplace(aNss, CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling});
|
||||
tableCards.emplace(bNss, CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling});
|
||||
|
||||
JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators, tableCards};
|
||||
JoinPredicateEstimator predEstimator{graph, paths, samplingEstimators};
|
||||
|
||||
auto selEst = predEstimator.joinPredicateSel(graph.getEdge(0));
|
||||
// The selectivity estimate comes from 1 / NDV(A.foo, A.bar) = 1 / 5 = 0.2
|
||||
|
|
|
|||
|
|
@ -73,6 +73,8 @@ using namespace cost_based_ranker;
|
|||
*/
|
||||
class FakeNdvEstimator : public ce::SamplingEstimator {
|
||||
public:
|
||||
FakeNdvEstimator(CardinalityEstimate collCard) : _collCard(collCard) {};
|
||||
|
||||
CardinalityEstimate estimateCardinality(const MatchExpression* expr) const override {
|
||||
MONGO_UNREACHABLE;
|
||||
}
|
||||
|
|
@ -115,7 +117,12 @@ public:
|
|||
return _fakeEstimates.at(fieldNames);
|
||||
}
|
||||
|
||||
double getCollCard() const override {
|
||||
return _collCard.toDouble();
|
||||
}
|
||||
|
||||
private:
|
||||
CardinalityEstimate _collCard;
|
||||
stdx::unordered_map<std::vector<FieldPath>, CardinalityEstimate> _fakeEstimates;
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue