mirror of https://github.com/mongodb/mongo
SERVER-115146: Estimate cardinality of join subsets (#45201)
GitOrigin-RevId: a9d039904dea9728bc76b19f6ffabd9aba0667d9
This commit is contained in:
parent
8041ad4686
commit
514b7bbae0
|
|
@ -30,6 +30,7 @@
|
||||||
|
|
||||||
#include "mongo/db/query/compiler/optimizer/join/cardinality_estimator.h"
|
#include "mongo/db/query/compiler/optimizer/join/cardinality_estimator.h"
|
||||||
|
|
||||||
|
#include "mongo/db/query/util/bitset_util.h"
|
||||||
#include "mongo/util/assert_util.h"
|
#include "mongo/util/assert_util.h"
|
||||||
|
|
||||||
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQueryCE
|
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQueryCE
|
||||||
|
|
@ -43,11 +44,11 @@ JoinCardinalityEstimator::JoinCardinalityEstimator(EdgeSelectivities edgeSelecti
|
||||||
|
|
||||||
JoinCardinalityEstimator JoinCardinalityEstimator::make(
|
JoinCardinalityEstimator JoinCardinalityEstimator::make(
|
||||||
const JoinReorderingContext& ctx,
|
const JoinReorderingContext& ctx,
|
||||||
const SingleTableAccessPlansResult& singleTablePlansRes,
|
const cost_based_ranker::EstimateMap& estimates,
|
||||||
const SamplingEstimatorMap& samplingEstimators) {
|
const SamplingEstimatorMap& samplingEstimators) {
|
||||||
return JoinCardinalityEstimator(
|
return JoinCardinalityEstimator(
|
||||||
JoinCardinalityEstimator::estimateEdgeSelectivities(ctx, samplingEstimators),
|
JoinCardinalityEstimator::estimateEdgeSelectivities(ctx, samplingEstimators),
|
||||||
JoinCardinalityEstimator::extractNodeCardinalities(ctx, singleTablePlansRes));
|
JoinCardinalityEstimator::extractNodeCardinalities(ctx, estimates));
|
||||||
}
|
}
|
||||||
|
|
||||||
EdgeSelectivities JoinCardinalityEstimator::estimateEdgeSelectivities(
|
EdgeSelectivities JoinCardinalityEstimator::estimateEdgeSelectivities(
|
||||||
|
|
@ -63,13 +64,16 @@ EdgeSelectivities JoinCardinalityEstimator::estimateEdgeSelectivities(
|
||||||
}
|
}
|
||||||
|
|
||||||
NodeCardinalities JoinCardinalityEstimator::extractNodeCardinalities(
|
NodeCardinalities JoinCardinalityEstimator::extractNodeCardinalities(
|
||||||
const JoinReorderingContext& ctx, const SingleTableAccessPlansResult& singleTablePlansRes) {
|
const JoinReorderingContext& ctx, const cost_based_ranker::EstimateMap& estimates) {
|
||||||
NodeCardinalities nodeCardinalities;
|
NodeCardinalities nodeCardinalities;
|
||||||
nodeCardinalities.reserve(ctx.joinGraph.numNodes());
|
nodeCardinalities.reserve(ctx.joinGraph.numNodes());
|
||||||
for (size_t nodeId = 0; nodeId < ctx.joinGraph.numNodes(); nodeId++) {
|
for (size_t nodeId = 0; nodeId < ctx.joinGraph.numNodes(); nodeId++) {
|
||||||
auto* cq = ctx.joinGraph.accessPathAt(nodeId);
|
auto* cq = ctx.joinGraph.accessPathAt(nodeId);
|
||||||
auto cbrRes = singleTablePlansRes.estimate.at(singleTablePlansRes.solns.at(cq)->root());
|
auto qsn = ctx.cbrCqQsns.find(cq);
|
||||||
nodeCardinalities.push_back(cbrRes.outCE);
|
tassert(11514600, "Missing QSN for CanonicalQuery", qsn != ctx.cbrCqQsns.end());
|
||||||
|
auto cbrRes = estimates.find(qsn->second->root());
|
||||||
|
tassert(11514601, "Missing estimate for QSN root", cbrRes != estimates.end());
|
||||||
|
nodeCardinalities.push_back(cbrRes->second.outCE);
|
||||||
}
|
}
|
||||||
return nodeCardinalities;
|
return nodeCardinalities;
|
||||||
}
|
}
|
||||||
|
|
@ -166,4 +170,58 @@ cost_based_ranker::SelectivityEstimate JoinCardinalityEstimator::joinPredicateSe
|
||||||
"selectivityEstimate"_attr = res);
|
"selectivityEstimate"_attr = res);
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cost_based_ranker::CardinalityEstimate JoinCardinalityEstimator::getOrEstimateSubsetCardinality(
|
||||||
|
const JoinReorderingContext& ctx, const NodeSet& nodes) {
|
||||||
|
if (auto it = _subsetCardinalities.find(nodes); it != _subsetCardinalities.end()) {
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
// This method assumes that all predicates (join and and single-table) are independent from each
|
||||||
|
// other, allowing us to combine them with simple multiplication below.
|
||||||
|
//
|
||||||
|
// '_edgeSels' contains edge selectivities: for a given edge connecting tables U and V, it is
|
||||||
|
// the fraction of rows that are output by the U-V join over the total number of row
|
||||||
|
// combinations between U and V (|U| * |V|). The number of rows in the U-V output is by
|
||||||
|
// definition this selectivity multiplied by |U| and |V|.
|
||||||
|
//
|
||||||
|
// We extend this logic to more tables using the independence assumption. For example, given the
|
||||||
|
// result of the U-V join, assume we are further joining with W through a V-W edge. We treat the
|
||||||
|
// intermediate result as a single "table" with its own cardinality. We apply the selectivity of
|
||||||
|
// the V-W edge to estimate how many rows from the intermediate result match rows in W, and
|
||||||
|
// finally multiply by |W| to account for the number of rows in W that participate in the join.
|
||||||
|
//
|
||||||
|
// So far, we have the product of all base table cardinalities with the selectivities of the
|
||||||
|
// edges in the graph induced by 'nodes'. We must also include the selectivities of single-table
|
||||||
|
// predicates. By the independence assumption, these can simply be multiplied with the product.
|
||||||
|
//
|
||||||
|
// One final complication involves cycles. If all selectivities from edges in a cycle are
|
||||||
|
// included in the estimate, we will double-count some join predicates. We remove cycles below
|
||||||
|
// by building a spanning tree (or forest) from the edges considered.
|
||||||
|
//
|
||||||
|
// Therefore, this method takes the following steps: Induce a subgraph involving only the nodes
|
||||||
|
// in 'nodes', and reduce the edges in that subgraph to remove cycles. Then, multiply:
|
||||||
|
// (1) The selectivities from the reduced edge list.
|
||||||
|
// (2) The base table cardinalities.
|
||||||
|
// (3) The single-table predicate selectivities.
|
||||||
|
// Finally, note that we have the pre-computed combination of (2) and (3) in '_nodeCEs'.
|
||||||
|
cost_based_ranker::CardinalityEstimate ce = cost_based_ranker::oneCE;
|
||||||
|
for (auto nodeIdx : iterable(nodes, ctx.joinGraph.numNodes())) {
|
||||||
|
ce = ce * _nodeCardinalities[nodeIdx].toDouble();
|
||||||
|
}
|
||||||
|
// TODO SERVER-115559: Invoke cycle breaker over these edges.
|
||||||
|
std::vector<EdgeId> edges = ctx.joinGraph.getEdgesForSubgraph(nodes);
|
||||||
|
for (const auto& edgeId : edges) {
|
||||||
|
ce = ce * _edgeSelectivities.at(edgeId);
|
||||||
|
}
|
||||||
|
|
||||||
|
LOGV2_DEBUG(11514603,
|
||||||
|
5,
|
||||||
|
"Estimating cardinality for subset",
|
||||||
|
"subset"_attr = nodes.to_string(),
|
||||||
|
"cardinalityEstimate"_attr = ce);
|
||||||
|
|
||||||
|
_subsetCardinalities.emplace(nodes, ce);
|
||||||
|
return ce;
|
||||||
|
}
|
||||||
} // namespace mongo::join_ordering
|
} // namespace mongo::join_ordering
|
||||||
|
|
|
||||||
|
|
@ -46,6 +46,11 @@ using NodeCardinalities = std::vector<cost_based_ranker::CardinalityEstimate>;
|
||||||
*/
|
*/
|
||||||
using EdgeSelectivities = std::vector<cost_based_ranker::SelectivityEstimate>;
|
using EdgeSelectivities = std::vector<cost_based_ranker::SelectivityEstimate>;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tracks for each JoinSubset (represented by a NodeSet) the estimated cardinality of the join.
|
||||||
|
*/
|
||||||
|
using SubsetCardinalities = absl::flat_hash_map<NodeSet, cost_based_ranker::CardinalityEstimate>;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Contains logic necessary to do selectivity and cardinality estimation for joins.
|
* Contains logic necessary to do selectivity and cardinality estimation for joins.
|
||||||
*/
|
*/
|
||||||
|
|
@ -55,7 +60,7 @@ public:
|
||||||
NodeCardinalities nodeCardinalities);
|
NodeCardinalities nodeCardinalities);
|
||||||
|
|
||||||
static JoinCardinalityEstimator make(const JoinReorderingContext& ctx,
|
static JoinCardinalityEstimator make(const JoinReorderingContext& ctx,
|
||||||
const SingleTableAccessPlansResult& singleTablePlansRes,
|
const cost_based_ranker::EstimateMap& estimates,
|
||||||
const SamplingEstimatorMap& samplingEstimators);
|
const SamplingEstimatorMap& samplingEstimators);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -70,10 +75,22 @@ public:
|
||||||
const JoinReorderingContext& ctx, const SamplingEstimatorMap& samplingEstimators);
|
const JoinReorderingContext& ctx, const SamplingEstimatorMap& samplingEstimators);
|
||||||
|
|
||||||
static NodeCardinalities extractNodeCardinalities(
|
static NodeCardinalities extractNodeCardinalities(
|
||||||
const JoinReorderingContext& ctx, const SingleTableAccessPlansResult& singleTablePlansRes);
|
const JoinReorderingContext& ctx, const cost_based_ranker::EstimateMap& estimates);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Estimates the cardinality of a join plan over the given subset of nodes. This method
|
||||||
|
* constructs a spanning tree from the edges in the graph induced by 'nodes', and combines the
|
||||||
|
* edge selectivities, base table cardinalities, and single-table predicate selectivities to
|
||||||
|
* produce an estimate. Populates `_subsetCardinalities` with the result.
|
||||||
|
*/
|
||||||
|
cost_based_ranker::CardinalityEstimate getOrEstimateSubsetCardinality(
|
||||||
|
const JoinReorderingContext& ctx, const NodeSet& nodes);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
EdgeSelectivities _edgeSelectivities;
|
const EdgeSelectivities _edgeSelectivities;
|
||||||
NodeCardinalities _nodeCardinalities;
|
const NodeCardinalities _nodeCardinalities;
|
||||||
|
|
||||||
|
// Populated over the course of subset enumeration.
|
||||||
|
SubsetCardinalities _subsetCardinalities;
|
||||||
};
|
};
|
||||||
} // namespace mongo::join_ordering
|
} // namespace mongo::join_ordering
|
||||||
|
|
|
||||||
|
|
@ -181,22 +181,187 @@ TEST_F(JoinPredicateEstimatorFixture, ExtractNodeCardinalities) {
|
||||||
const auto aCE = CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling};
|
const auto aCE = CardinalityEstimate{CardinalityType{10}, EstimationSource::Sampling};
|
||||||
const auto bCE = CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling};
|
const auto bCE = CardinalityEstimate{CardinalityType{20}, EstimationSource::Sampling};
|
||||||
|
|
||||||
SingleTableAccessPlansResult singleTablePlansRes;
|
cost_based_ranker::EstimateMap estimates;
|
||||||
{
|
{
|
||||||
auto aPlan = makeCollScanPlan(aNss);
|
auto aPlan = makeCollScanPlan(aNss);
|
||||||
singleTablePlansRes.estimate[aPlan->root()] = {inCE, aCE};
|
estimates[aPlan->root()] = {inCE, aCE};
|
||||||
singleTablePlansRes.solns[graph.getNode(aNodeId).accessPath.get()] = std::move(aPlan);
|
ctx.cbrCqQsns[graph.getNode(aNodeId).accessPath.get()] = std::move(aPlan);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
auto bPlan = makeCollScanPlan(bNss);
|
auto bPlan = makeCollScanPlan(bNss);
|
||||||
singleTablePlansRes.estimate[bPlan->root()] = {inCE, bCE};
|
estimates[bPlan->root()] = {inCE, bCE};
|
||||||
singleTablePlansRes.solns[graph.getNode(bNodeId).accessPath.get()] = std::move(bPlan);
|
ctx.cbrCqQsns[graph.getNode(bNodeId).accessPath.get()] = std::move(bPlan);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto nodeCardinalities =
|
auto nodeCardinalities = JoinCardinalityEstimator::extractNodeCardinalities(ctx, estimates);
|
||||||
JoinCardinalityEstimator::extractNodeCardinalities(ctx, singleTablePlansRes);
|
|
||||||
ASSERT_EQ(2U, nodeCardinalities.size());
|
ASSERT_EQ(2U, nodeCardinalities.size());
|
||||||
ASSERT_EQ(aCE, nodeCardinalities[aNodeId]);
|
ASSERT_EQ(aCE, nodeCardinalities[aNodeId]);
|
||||||
ASSERT_EQ(bCE, nodeCardinalities[bNodeId]);
|
ASSERT_EQ(bCE, nodeCardinalities[bNodeId]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
void pushNNodes(JoinGraph& graph, size_t n) {
|
||||||
|
for (size_t i = 0; i < n; i++) {
|
||||||
|
auto nss =
|
||||||
|
NamespaceString::createNamespaceString_forTest("test", str::stream() << "nss" << i);
|
||||||
|
graph.addNode(nss, nullptr, boost::none);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
TEST_F(JoinPredicateEstimatorFixture, EstimateSubsetCardinality) {
|
||||||
|
// Construct 6 nodes, with single-table CEs that are multiples of 10. Node 0 will be ignored in
|
||||||
|
// the rest of the test; it is only there for easy math.
|
||||||
|
size_t numNodes = 6;
|
||||||
|
pushNNodes(graph, numNodes);
|
||||||
|
NodeCardinalities nodeCEs{oneCE, oneCE * 10, oneCE * 20, oneCE * 30, oneCE * 40, oneCE * 50};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a graph like so
|
||||||
|
* 0 -- 1 -- 2 -- 3
|
||||||
|
* / \
|
||||||
|
* 4 -- 5
|
||||||
|
* With edge selectivies that are multiples of 0.1.
|
||||||
|
* Note: There is one cycle here between nodes 3, 4, and 5. There are no other cycles (implicit
|
||||||
|
* or explicit).
|
||||||
|
*/
|
||||||
|
graph.addSimpleEqualityEdge(NodeId(0), NodeId(1), 0, 1);
|
||||||
|
graph.addSimpleEqualityEdge(NodeId(1), NodeId(2), 2, 3);
|
||||||
|
graph.addSimpleEqualityEdge(NodeId(2), NodeId(3), 4, 5);
|
||||||
|
graph.addSimpleEqualityEdge(NodeId(3), NodeId(4), 6, 7);
|
||||||
|
graph.addSimpleEqualityEdge(NodeId(4), NodeId(5), 7, 8);
|
||||||
|
graph.addSimpleEqualityEdge(NodeId(3), NodeId(5), 6, 8);
|
||||||
|
|
||||||
|
EdgeSelectivities edgeSels;
|
||||||
|
for (size_t i = 0; i < numNodes; i++) {
|
||||||
|
edgeSels.push_back(cost_based_ranker::SelectivityEstimate(SelectivityType(i * 0.1),
|
||||||
|
EstimationSource::Sampling));
|
||||||
|
}
|
||||||
|
|
||||||
|
JoinCardinalityEstimator jce(edgeSels, nodeCEs);
|
||||||
|
{
|
||||||
|
// Cardinality for subset of size 1 is pulled directly from the CE map.
|
||||||
|
ASSERT_EQ(oneCE * 10, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1)));
|
||||||
|
ASSERT_EQ(oneCE * 20, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(2)));
|
||||||
|
ASSERT_EQ(oneCE * 30, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(3)));
|
||||||
|
ASSERT_EQ(oneCE * 40, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(4)));
|
||||||
|
ASSERT_EQ(oneCE * 50, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(5)));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
// Connected sub-graph cardinality is a combo of single-table CEs and edge selectivities.
|
||||||
|
ASSERT_EQ(oneCE * 10 * 20 * 0.1,
|
||||||
|
jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1, 2)));
|
||||||
|
ASSERT_EQ(oneCE * 30 * 40 * 0.3,
|
||||||
|
jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(3, 4)));
|
||||||
|
|
||||||
|
ASSERT_EQ(oneCE * 10 * 20 * 30 * 0.1 * 0.2,
|
||||||
|
jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1, 2, 3)));
|
||||||
|
ASSERT_EQ(oneCE * 20 * 30 * 40 * 0.2 * 0.3,
|
||||||
|
jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(2, 3, 4)));
|
||||||
|
|
||||||
|
ASSERT_EQ(oneCE * 10 * 20 * 30 * 50 * 0.1 * 0.2 * 0.5,
|
||||||
|
jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1, 2, 3, 5)));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// Disconnected sub-graph cardinality includes some cross-products.
|
||||||
|
ASSERT_EQ(oneCE * 20 * 40, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(2, 4)));
|
||||||
|
ASSERT_EQ(oneCE * 10 * 30 * 40 * 0.3,
|
||||||
|
jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1, 3, 4)));
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// TODO SERVER-115559: Adjust the assertions made here when we implement cycle breaking.
|
||||||
|
// Cycle cardinality estimation should not involve all edges in the cycle.
|
||||||
|
ASSERT_EQ(oneCE * 30 * 40 * 50 * 0.3 * 0.4 * 0.5,
|
||||||
|
jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(3, 4, 5)));
|
||||||
|
|
||||||
|
ASSERT_EQ(oneCE * 10 * 20 * 30 * 40 * 50 * 0.1 * 0.2 * 0.3 * 0.4 * 0.5,
|
||||||
|
jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1, 2, 3, 4, 5)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Similar to the test above, but verifies that path IDs are considered when determining the
|
||||||
|
// presence of cycles.
|
||||||
|
TEST_F(JoinPredicateEstimatorFixture, EstimateSubsetCardinalityAlmostCycle) {
|
||||||
|
size_t numNodes = 4;
|
||||||
|
pushNNodes(graph, numNodes);
|
||||||
|
NodeCardinalities nodeCEs{oneCE, oneCE * 10, oneCE * 20, oneCE * 30};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a graph like so
|
||||||
|
* 0 -- 1
|
||||||
|
* / \
|
||||||
|
* 2 -- 3
|
||||||
|
* Note: There is NO cycle here, because the path IDs chosen for the edges are different.
|
||||||
|
*/
|
||||||
|
graph.addSimpleEqualityEdge(NodeId(0), NodeId(1), 0, 1);
|
||||||
|
graph.addSimpleEqualityEdge(NodeId(1), NodeId(2), 2, 3);
|
||||||
|
graph.addSimpleEqualityEdge(NodeId(2), NodeId(3), 4, 5);
|
||||||
|
graph.addSimpleEqualityEdge(NodeId(1), NodeId(3), 6, 7);
|
||||||
|
|
||||||
|
EdgeSelectivities edgeSels;
|
||||||
|
for (size_t i = 0; i < numNodes; i++) {
|
||||||
|
edgeSels.push_back(cost_based_ranker::SelectivityEstimate(SelectivityType(i * 0.1),
|
||||||
|
EstimationSource::Sampling));
|
||||||
|
}
|
||||||
|
|
||||||
|
JoinCardinalityEstimator jce(edgeSels, nodeCEs);
|
||||||
|
ASSERT_EQ(oneCE * 10 * 20 * 30 * 0.1 * 0.2 * 0.3,
|
||||||
|
jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1, 2, 3)));
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(JoinPredicateEstimatorFixture, EstimateSubsetCardinalitySameCollectionPresentTwice) {
|
||||||
|
auto nssOne = NamespaceString::createNamespaceString_forTest("test", str::stream() << "nssOne");
|
||||||
|
auto nssTwo = NamespaceString::createNamespaceString_forTest("test", str::stream() << "nssTwo");
|
||||||
|
|
||||||
|
std::string fieldNameA = str::stream() << "a" << 0;
|
||||||
|
auto filterBSONA = BSON(fieldNameA << BSON("$gt" << 0));
|
||||||
|
|
||||||
|
std::string fieldNameB = str::stream() << "b" << 0;
|
||||||
|
auto filterBSONB = BSON(fieldNameB << BSON("$gt" << 0));
|
||||||
|
|
||||||
|
// The first reference to nssOne has a filter on field "a".
|
||||||
|
auto cqA = makeCanonicalQuery(nssOne, filterBSONA);
|
||||||
|
ASSERT_TRUE(graph.addNode(nssOne, std::move(cqA), boost::none).has_value());
|
||||||
|
|
||||||
|
// The second reference to nssOne has a filter on field "b". This node will have larger CE.
|
||||||
|
auto cqB = makeCanonicalQuery(nssOne, filterBSONB);
|
||||||
|
ASSERT_TRUE(graph.addNode(nssOne, std::move(cqB), boost::none).has_value());
|
||||||
|
|
||||||
|
// Finally, there is a node in between for nssTwo.
|
||||||
|
auto cqNssTwo = makeCanonicalQuery(nssTwo, filterBSONA);
|
||||||
|
ASSERT_TRUE(graph.addNode(nssTwo, std::move(cqNssTwo), boost::none).has_value());
|
||||||
|
|
||||||
|
// Finalize graph:
|
||||||
|
// 0 1
|
||||||
|
// \ /
|
||||||
|
// 2
|
||||||
|
graph.addSimpleEqualityEdge(NodeId(0), NodeId(2), 0, 1);
|
||||||
|
graph.addSimpleEqualityEdge(NodeId(1), NodeId(2), 2, 3);
|
||||||
|
EdgeSelectivities edgeSels;
|
||||||
|
for (size_t i = 0; i < 2; i++) {
|
||||||
|
edgeSels.push_back(cost_based_ranker::SelectivityEstimate(SelectivityType((i + 1) * 0.1),
|
||||||
|
EstimationSource::Sampling));
|
||||||
|
}
|
||||||
|
NodeCardinalities nodeCEs{
|
||||||
|
oneCE * 10,
|
||||||
|
oneCE * 20,
|
||||||
|
oneCE * 30,
|
||||||
|
};
|
||||||
|
JoinCardinalityEstimator jce(edgeSels, nodeCEs);
|
||||||
|
|
||||||
|
// Show that even though the namespace is the same for two of the nodes, we are able to
|
||||||
|
// correctly associate CE with the particular filters associated with those nodes.
|
||||||
|
ASSERT_EQ(oneCE * 10, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(0)));
|
||||||
|
ASSERT_EQ(oneCE * 20, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1)));
|
||||||
|
ASSERT_EQ(oneCE * 30, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(2)));
|
||||||
|
|
||||||
|
ASSERT_EQ(oneCE * 10 * 20, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(0, 1)));
|
||||||
|
ASSERT_EQ(oneCE * 10 * 30 * 0.1, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(0, 2)));
|
||||||
|
ASSERT_EQ(oneCE * 20 * 30 * 0.2, jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(1, 2)));
|
||||||
|
|
||||||
|
ASSERT_EQ(oneCE * 10 * 20 * 30 * 0.1 * 0.2,
|
||||||
|
jce.getOrEstimateSubsetCardinality(jCtx, makeNodeSet(0, 1, 2)));
|
||||||
|
}
|
||||||
} // namespace mongo::join_ordering
|
} // namespace mongo::join_ordering
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@
|
||||||
#include "mongo/db/pipeline/document_source.h"
|
#include "mongo/db/pipeline/document_source.h"
|
||||||
#include "mongo/db/pipeline/document_source_lookup.h"
|
#include "mongo/db/pipeline/document_source_lookup.h"
|
||||||
#include "mongo/db/query/compiler/optimizer/join/agg_join_model.h"
|
#include "mongo/db/query/compiler/optimizer/join/agg_join_model.h"
|
||||||
|
#include "mongo/db/query/compiler/optimizer/join/cardinality_estimator.h"
|
||||||
#include "mongo/db/query/compiler/optimizer/join/join_reordering_context.h"
|
#include "mongo/db/query/compiler/optimizer/join/join_reordering_context.h"
|
||||||
#include "mongo/db/query/compiler/optimizer/join/reorder_joins.h"
|
#include "mongo/db/query/compiler/optimizer/join/reorder_joins.h"
|
||||||
#include "mongo/db/query/compiler/optimizer/join/single_table_access.h"
|
#include "mongo/db/query/compiler/optimizer/join/single_table_access.h"
|
||||||
|
|
@ -213,11 +214,14 @@ StatusWith<JoinReorderedExecutorResult> getJoinReorderedExecutor(
|
||||||
|
|
||||||
ReorderedJoinSolution reordered;
|
ReorderedJoinSolution reordered;
|
||||||
switch (qkc.getJoinReorderMode()) {
|
switch (qkc.getJoinReorderMode()) {
|
||||||
case JoinReorderModeEnum::kBottomUp:
|
case JoinReorderModeEnum::kBottomUp: {
|
||||||
// Optimize join order using bottom-up Sellinger-style algorithm.
|
// Optimize join order using bottom-up Sellinger-style algorithm.
|
||||||
reordered = constructSolutionBottomUp(std::move(ctx),
|
JoinCardinalityEstimator estimator = JoinCardinalityEstimator::make(
|
||||||
getPlanTreeShape(qkc.getJoinPlanTreeShape()));
|
ctx, swAccessPlans.getValue().estimate, samplingEstimators);
|
||||||
|
reordered = constructSolutionBottomUp(
|
||||||
|
std::move(ctx), std::move(estimator), getPlanTreeShape(qkc.getJoinPlanTreeShape()));
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
case JoinReorderModeEnum::kRandom:
|
case JoinReorderModeEnum::kRandom:
|
||||||
// Randomly reorder joins.
|
// Randomly reorder joins.
|
||||||
reordered =
|
reordered =
|
||||||
|
|
|
||||||
|
|
@ -70,7 +70,7 @@ bool PlanEnumeratorContext::canPlanBeEnumerated(PlanTreeShape type,
|
||||||
|
|
||||||
case PlanTreeShape::ZIG_ZAG:
|
case PlanTreeShape::ZIG_ZAG:
|
||||||
// We create a zig-zag plan by alternating which side we add a "base" join subset to.
|
// We create a zig-zag plan by alternating which side we add a "base" join subset to.
|
||||||
// TODO SERVER-113059: Pick based on which side has smaller CE.
|
// TODO SERVER-115147: Pick based on which side has smaller CE.
|
||||||
if (left.isBaseCollectionAccess() && right.isBaseCollectionAccess()) {
|
if (left.isBaseCollectionAccess() && right.isBaseCollectionAccess()) {
|
||||||
/**
|
/**
|
||||||
* We always allow a join like this as a base case:
|
* We always allow a join like this as a base case:
|
||||||
|
|
@ -162,7 +162,6 @@ void PlanEnumeratorContext::addJoinPlan(PlanTreeShape type,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// TODO SERVER-113059: Rudimentary cost metric/tracking.
|
|
||||||
subset.plans.push_back(
|
subset.plans.push_back(
|
||||||
_registry.registerJoinNode(subset, method, left.bestPlan(), right.bestPlan()));
|
_registry.registerJoinNode(subset, method, left.bestPlan(), right.bestPlan()));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "mongo/db/query/compiler/optimizer/join/cardinality_estimator.h"
|
||||||
#include "mongo/db/query/compiler/optimizer/join/join_plan.h"
|
#include "mongo/db/query/compiler/optimizer/join/join_plan.h"
|
||||||
#include "mongo/db/query/compiler/optimizer/join/join_reordering_context.h"
|
#include "mongo/db/query/compiler/optimizer/join/join_reordering_context.h"
|
||||||
#include "mongo/util/modules.h"
|
#include "mongo/util/modules.h"
|
||||||
|
|
@ -46,7 +47,9 @@ enum class PlanTreeShape { LEFT_DEEP, RIGHT_DEEP, ZIG_ZAG };
|
||||||
*/
|
*/
|
||||||
class PlanEnumeratorContext {
|
class PlanEnumeratorContext {
|
||||||
public:
|
public:
|
||||||
PlanEnumeratorContext(const JoinReorderingContext& ctx) : _ctx{ctx} {}
|
PlanEnumeratorContext(const JoinReorderingContext& ctx,
|
||||||
|
const JoinCardinalityEstimator& estimator)
|
||||||
|
: _ctx{ctx}, _estimator(estimator) {}
|
||||||
|
|
||||||
// Delete copy and move operations to prevent issues with copying '_joinGraph'.
|
// Delete copy and move operations to prevent issues with copying '_joinGraph'.
|
||||||
PlanEnumeratorContext(const PlanEnumeratorContext&) = delete;
|
PlanEnumeratorContext(const PlanEnumeratorContext&) = delete;
|
||||||
|
|
@ -115,6 +118,7 @@ private:
|
||||||
const JoinSubset& subset) const;
|
const JoinSubset& subset) const;
|
||||||
|
|
||||||
const JoinReorderingContext& _ctx;
|
const JoinReorderingContext& _ctx;
|
||||||
|
const JoinCardinalityEstimator& _estimator;
|
||||||
|
|
||||||
// Hold intermediate results of the enumeration algorithm. The index into the outer vector
|
// Hold intermediate results of the enumeration algorithm. The index into the outer vector
|
||||||
// represents the "level". The i'th level contains solutions for the optimal way to join all
|
// represents the "level". The i'th level contains solutions for the optimal way to join all
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@
|
||||||
|
|
||||||
#include "mongo/db/query/compiler/optimizer/join/plan_enumerator.h"
|
#include "mongo/db/query/compiler/optimizer/join/plan_enumerator.h"
|
||||||
|
|
||||||
|
#include "mongo/db/query/compiler/optimizer/join/cardinality_estimator.h"
|
||||||
#include "mongo/db/query/compiler/optimizer/join/plan_enumerator_helpers.h"
|
#include "mongo/db/query/compiler/optimizer/join/plan_enumerator_helpers.h"
|
||||||
#include "mongo/db/query/compiler/optimizer/join/unit_test_helpers.h"
|
#include "mongo/db/query/compiler/optimizer/join/unit_test_helpers.h"
|
||||||
#include "mongo/unittest/death_test.h"
|
#include "mongo/unittest/death_test.h"
|
||||||
|
|
@ -119,7 +120,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PlanEnumeratorContext ctx{jCtx};
|
PlanEnumeratorContext ctx{jCtx, emptyEstimator};
|
||||||
ctx.enumerateJoinSubsets(shape);
|
ctx.enumerateJoinSubsets(shape);
|
||||||
ASSERT_EQ(numNodes, ctx.getSubsets(0).size());
|
ASSERT_EQ(numNodes, ctx.getSubsets(0).size());
|
||||||
for (size_t k = 1; k < numNodes; ++k) {
|
for (size_t k = 1; k < numNodes; ++k) {
|
||||||
|
|
@ -137,6 +138,8 @@ public:
|
||||||
goldenCtx->outStream() << ctx.toString() << std::endl;
|
goldenCtx->outStream() << ctx.toString() << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
JoinCardinalityEstimator emptyEstimator = JoinCardinalityEstimator({}, {});
|
||||||
};
|
};
|
||||||
|
|
||||||
TEST_F(JoinPlanEnumeratorTest, InitializeSubsetsTwo) {
|
TEST_F(JoinPlanEnumeratorTest, InitializeSubsetsTwo) {
|
||||||
|
|
@ -146,7 +149,7 @@ TEST_F(JoinPlanEnumeratorTest, InitializeSubsetsTwo) {
|
||||||
graph.addSimpleEqualityEdge((NodeId)0, (NodeId)1, 0, 1);
|
graph.addSimpleEqualityEdge((NodeId)0, (NodeId)1, 0, 1);
|
||||||
|
|
||||||
{
|
{
|
||||||
PlanEnumeratorContext ctx{jCtx};
|
PlanEnumeratorContext ctx{jCtx, emptyEstimator};
|
||||||
ctx.enumerateJoinSubsets(PlanTreeShape::LEFT_DEEP);
|
ctx.enumerateJoinSubsets(PlanTreeShape::LEFT_DEEP);
|
||||||
|
|
||||||
auto& level0 = ctx.getSubsets(0);
|
auto& level0 = ctx.getSubsets(0);
|
||||||
|
|
@ -163,7 +166,7 @@ TEST_F(JoinPlanEnumeratorTest, InitializeSubsetsTwo) {
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
PlanEnumeratorContext ctx{jCtx};
|
PlanEnumeratorContext ctx{jCtx, emptyEstimator};
|
||||||
ctx.enumerateJoinSubsets(PlanTreeShape::RIGHT_DEEP);
|
ctx.enumerateJoinSubsets(PlanTreeShape::RIGHT_DEEP);
|
||||||
|
|
||||||
auto& level0 = ctx.getSubsets(0);
|
auto& level0 = ctx.getSubsets(0);
|
||||||
|
|
@ -189,7 +192,7 @@ TEST_F(JoinPlanEnumeratorTest, InitializeSubsetsThree) {
|
||||||
graph.addSimpleEqualityEdge(NodeId(1), NodeId(2), 1, 2);
|
graph.addSimpleEqualityEdge(NodeId(1), NodeId(2), 1, 2);
|
||||||
|
|
||||||
{
|
{
|
||||||
PlanEnumeratorContext ctx{jCtx};
|
PlanEnumeratorContext ctx{jCtx, emptyEstimator};
|
||||||
ctx.enumerateJoinSubsets(PlanTreeShape::LEFT_DEEP);
|
ctx.enumerateJoinSubsets(PlanTreeShape::LEFT_DEEP);
|
||||||
|
|
||||||
auto& level0 = ctx.getSubsets(0);
|
auto& level0 = ctx.getSubsets(0);
|
||||||
|
|
@ -213,7 +216,7 @@ TEST_F(JoinPlanEnumeratorTest, InitializeSubsetsThree) {
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
PlanEnumeratorContext ctx{jCtx};
|
PlanEnumeratorContext ctx{jCtx, emptyEstimator};
|
||||||
ctx.enumerateJoinSubsets(PlanTreeShape::RIGHT_DEEP);
|
ctx.enumerateJoinSubsets(PlanTreeShape::RIGHT_DEEP);
|
||||||
|
|
||||||
auto& level0 = ctx.getSubsets(0);
|
auto& level0 = ctx.getSubsets(0);
|
||||||
|
|
@ -245,7 +248,7 @@ TEST_F(JoinPlanEnumeratorTest, InitializeSubsetsThreeNoCycle) {
|
||||||
graph.addSimpleEqualityEdge(NodeId(0), NodeId(2), 0, 2);
|
graph.addSimpleEqualityEdge(NodeId(0), NodeId(2), 0, 2);
|
||||||
|
|
||||||
{
|
{
|
||||||
PlanEnumeratorContext ctx{jCtx};
|
PlanEnumeratorContext ctx{jCtx, emptyEstimator};
|
||||||
ctx.enumerateJoinSubsets(PlanTreeShape::LEFT_DEEP);
|
ctx.enumerateJoinSubsets(PlanTreeShape::LEFT_DEEP);
|
||||||
|
|
||||||
auto& level0 = ctx.getSubsets(0);
|
auto& level0 = ctx.getSubsets(0);
|
||||||
|
|
@ -269,7 +272,7 @@ TEST_F(JoinPlanEnumeratorTest, InitializeSubsetsThreeNoCycle) {
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
{
|
||||||
PlanEnumeratorContext ctx{jCtx};
|
PlanEnumeratorContext ctx{jCtx, emptyEstimator};
|
||||||
ctx.enumerateJoinSubsets(PlanTreeShape::RIGHT_DEEP);
|
ctx.enumerateJoinSubsets(PlanTreeShape::RIGHT_DEEP);
|
||||||
|
|
||||||
auto& level0 = ctx.getSubsets(0);
|
auto& level0 = ctx.getSubsets(0);
|
||||||
|
|
|
||||||
|
|
@ -380,8 +380,9 @@ ReorderedJoinSolution constructSolutionWithRandomOrder(const JoinReorderingConte
|
||||||
}
|
}
|
||||||
|
|
||||||
ReorderedJoinSolution constructSolutionBottomUp(const JoinReorderingContext& ctx,
|
ReorderedJoinSolution constructSolutionBottomUp(const JoinReorderingContext& ctx,
|
||||||
|
JoinCardinalityEstimator estimator,
|
||||||
PlanTreeShape shape) {
|
PlanTreeShape shape) {
|
||||||
PlanEnumeratorContext peCtx(ctx);
|
PlanEnumeratorContext peCtx(ctx, estimator);
|
||||||
|
|
||||||
peCtx.enumerateJoinSubsets(shape);
|
peCtx.enumerateJoinSubsets(shape);
|
||||||
auto bestPlanNodeId = peCtx.getBestFinalPlan();
|
auto bestPlanNodeId = peCtx.getBestFinalPlan();
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@
|
||||||
*/
|
*/
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "mongo/db/query/compiler/optimizer/join/cardinality_estimator.h"
|
||||||
#include "mongo/db/query/compiler/optimizer/join/join_reordering_context.h"
|
#include "mongo/db/query/compiler/optimizer/join/join_reordering_context.h"
|
||||||
#include "mongo/db/query/compiler/optimizer/join/plan_enumerator.h"
|
#include "mongo/db/query/compiler/optimizer/join/plan_enumerator.h"
|
||||||
#include "mongo/util/modules.h"
|
#include "mongo/util/modules.h"
|
||||||
|
|
@ -56,6 +57,7 @@ ReorderedJoinSolution constructSolutionWithRandomOrder(const JoinReorderingConte
|
||||||
* Sellinger-style join optimization.
|
* Sellinger-style join optimization.
|
||||||
*/
|
*/
|
||||||
ReorderedJoinSolution constructSolutionBottomUp(const JoinReorderingContext& ctx,
|
ReorderedJoinSolution constructSolutionBottomUp(const JoinReorderingContext& ctx,
|
||||||
|
JoinCardinalityEstimator estimator,
|
||||||
PlanTreeShape shape);
|
PlanTreeShape shape);
|
||||||
|
|
||||||
} // namespace mongo::join_ordering
|
} // namespace mongo::join_ordering
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue