From b506d1fe7cc35901d1b900555ea57a4c9863147e Mon Sep 17 00:00:00 2001 From: Alexander Ignatyev Date: Mon, 15 Dec 2025 20:19:45 +0000 Subject: [PATCH] SERVER-115093 Reimplement join graph cycle breaker (#45054) GitOrigin-RevId: 4daf17acd855213f09bec5e5f5bfa7c875189cd7 --- .../query/compiler/optimizer/join/BUILD.bazel | 3 +- .../optimizer/join/adjacency_matrix.cpp | 46 --- .../optimizer/join/adjacency_matrix.h | 49 --- .../optimizer/join/adjacency_matrix_test.cpp | 145 --------- .../optimizer/join/graph_cycle_breaker.cpp | 199 ++++++++---- .../optimizer/join/graph_cycle_breaker.h | 55 ++-- .../join/graph_cycle_breaker_test.cpp | 304 +++++++++++++++++- .../compiler/optimizer/join/join_graph.h | 8 +- .../optimizer/join/plan_enumerator.cpp | 1 - 9 files changed, 470 insertions(+), 340 deletions(-) delete mode 100644 src/mongo/db/query/compiler/optimizer/join/adjacency_matrix.cpp delete mode 100644 src/mongo/db/query/compiler/optimizer/join/adjacency_matrix.h delete mode 100644 src/mongo/db/query/compiler/optimizer/join/adjacency_matrix_test.cpp diff --git a/src/mongo/db/query/compiler/optimizer/join/BUILD.bazel b/src/mongo/db/query/compiler/optimizer/join/BUILD.bazel index 00f9b8b423e..f539b4429df 100644 --- a/src/mongo/db/query/compiler/optimizer/join/BUILD.bazel +++ b/src/mongo/db/query/compiler/optimizer/join/BUILD.bazel @@ -20,7 +20,6 @@ exports_files( mongo_cc_library( name = "join_ordering", srcs = [ - "adjacency_matrix.cpp", "graph_cycle_breaker.cpp", "join_graph.cpp", "path_resolver.cpp", @@ -28,6 +27,7 @@ mongo_cc_library( deps = [ "//src/mongo/db:server_base", "//src/mongo/db/query:canonical_query_base", + "//src/mongo/db/query/compiler/optimizer/cost_based_ranker:estimates", "//src/mongo/db/query/util:query_util_bitset", "//src/mongo/db/query/util:query_util_disjoint_set", ], @@ -59,7 +59,6 @@ mongo_cc_library( mongo_cc_unit_test( name = "join_ordering_test", srcs = [ - "adjacency_matrix_test.cpp", "agg_join_model_test.cpp", "cardinality_estimator_join_test.cpp", "graph_cycle_breaker_test.cpp", diff --git a/src/mongo/db/query/compiler/optimizer/join/adjacency_matrix.cpp b/src/mongo/db/query/compiler/optimizer/join/adjacency_matrix.cpp deleted file mode 100644 index fda0a24b6b6..00000000000 --- a/src/mongo/db/query/compiler/optimizer/join/adjacency_matrix.cpp +++ /dev/null @@ -1,46 +0,0 @@ -/** - * Copyright (C) 2025-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * . - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#include "mongo/db/query/compiler/optimizer/join/adjacency_matrix.h" - -#include "mongo/db/query/util/bitset_util.h" - -namespace mongo::join_ordering { -AdjacencyMatrix makeAdjacencyMatrix(const JoinGraph& joinGraph, - const std::vector& subgraph) { - AdjacencyMatrix matrix; - for (auto edgeId : subgraph) { - const auto& edge = joinGraph.getEdge(edgeId); - matrix[edge.left].set(edge.right); - matrix[edge.right].set(edge.left); - } - - return matrix; -} -} // namespace mongo::join_ordering diff --git a/src/mongo/db/query/compiler/optimizer/join/adjacency_matrix.h b/src/mongo/db/query/compiler/optimizer/join/adjacency_matrix.h deleted file mode 100644 index 0119d3068f3..00000000000 --- a/src/mongo/db/query/compiler/optimizer/join/adjacency_matrix.h +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Copyright (C) 2025-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * . - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - -#pragma once - -#include "mongo/db/query/compiler/optimizer/join/join_graph.h" -#include "mongo/util/modules.h" - -namespace mongo::join_ordering { -/** - * Adjacency matrix for a join subgraph. it determines connectivity between some nodes 'u' and 'v': - * 'adjacencyMatrix[u][v]' returns 'true' if the nodes are directly connected. - * Since the join graph is undirected, the matrix is always symmetrical: - * 'adjacencyMatrix[u][v] == adjacencyMatrix[v][u]'. - */ -using AdjacencyMatrix = std::array; - -/** - * Builds an AdjacencyMatrix for a specified 'subgraph' within the 'joinGraph'. - */ -AdjacencyMatrix makeAdjacencyMatrix(const JoinGraph& joinGraph, - const std::vector& subgraph); -} // namespace mongo::join_ordering diff --git a/src/mongo/db/query/compiler/optimizer/join/adjacency_matrix_test.cpp b/src/mongo/db/query/compiler/optimizer/join/adjacency_matrix_test.cpp deleted file mode 100644 index 7f78408b8ca..00000000000 --- a/src/mongo/db/query/compiler/optimizer/join/adjacency_matrix_test.cpp +++ /dev/null @@ -1,145 +0,0 @@ -/** - * Copyright (C) 2025-present MongoDB, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the Server Side Public License, version 1, - * as published by MongoDB, Inc. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * Server Side Public License for more details. - * - * You should have received a copy of the Server Side Public License - * along with this program. If not, see - * . - * - * As a special exception, the copyright holders give permission to link the - * code of portions of this program with the OpenSSL library under certain - * conditions as described in each individual source file and distribute - * linked combinations including the program with the OpenSSL library. You - * must comply with the Server Side Public License in all respects for - * all of the code used other than as permitted herein. If you modify file(s) - * with this exception, you may extend this exception to your version of the - * file(s), but you are not obligated to do so. If you do not wish to do so, - * delete this exception statement from your version. If you delete this - * exception statement from all source files in the program, then also delete - * it in the license file. - */ - - -#include "mongo/db/query/compiler/optimizer/join/adjacency_matrix.h" - -#include "mongo/db/query/compiler/optimizer/join/unit_test_helpers.h" -#include "mongo/unittest/unittest.h" - - -namespace mongo::join_ordering { - -namespace { -AdjacencyMatrix makeMatrix(std::vector rows) { - AdjacencyMatrix matrix; - for (size_t i = 0; i < rows.size(); ++i) { - matrix[i] = NodeSet{rows[i].data(), rows[i].size()}; - } - return matrix; -} - -NodeId addNode(JoinGraph& graph) { - return *graph.addNode(makeNSS("test"), nullptr, boost::none); -} - -EdgeId addEdge(JoinGraph& graph, NodeId left, NodeId right) { - return *graph.addSimpleEqualityEdge(left, right, 0, 0); -} -} // namespace - -TEST(AdjacencyMatrixBuilderTest, Smoke) { - JoinGraph graph; - auto node0 = addNode(graph); - auto node1 = addNode(graph); - auto node2 = addNode(graph); - auto node3 = addNode(graph); - auto node4 = addNode(graph); - - auto edge01 = addEdge(graph, node0, node1); - auto edge12 = addEdge(graph, node1, node2); - auto edge23 = addEdge(graph, node2, node3); - auto edge34 = addEdge(graph, node3, node4); - auto edge40 = addEdge(graph, node4, node0); - - // No edges case - { - auto expectedMatrix = makeMatrix({ - "00000", - "00000", - "00000", - "00000", - "00000", - }); - ASSERT_EQ(makeAdjacencyMatrix(graph, {}), expectedMatrix); - } - - // All edges case - { - auto expectedMatrix = makeMatrix({ - "10010", - "00101", - "01010", - "10100", - "01001", - }); - ASSERT_EQ(makeAdjacencyMatrix(graph, {edge01, edge12, edge23, edge34, edge40}), - expectedMatrix); - } - - // 2 edges case - { - auto expectedMatrix = makeMatrix({ - "00010", - "00101", - "00010", - "00000", - "00000", - }); - ASSERT_EQ(makeAdjacencyMatrix(graph, {edge01, edge12}), expectedMatrix); - } - - // 1 edge case - { - auto expectedMatrix = makeMatrix({ - "00000", - "00100", - "00010", - "00000", - "00000", - }); - ASSERT_EQ(makeAdjacencyMatrix(graph, {edge12}), expectedMatrix); - } -} - -TEST(AdjacencyMatrixBuilderTest, FullyConnectedGraph) { - JoinGraph graph; - auto node0 = addNode(graph); - auto node1 = addNode(graph); - auto node2 = addNode(graph); - auto node3 = addNode(graph); - - auto edge01 = addEdge(graph, node0, node1); - auto edge02 = addEdge(graph, node0, node2); - auto edge03 = addEdge(graph, node0, node3); - auto edge12 = addEdge(graph, node1, node2); - auto edge13 = addEdge(graph, node1, node3); - auto edge23 = addEdge(graph, node2, node3); - - auto expectedMatrix = makeMatrix({ - "1110", - "1101", - "1011", - "0111", - }); - - ASSERT_EQ(makeAdjacencyMatrix(graph, {edge01, edge02, edge03, edge12, edge13, edge23}), - expectedMatrix); -} -} // namespace mongo::join_ordering diff --git a/src/mongo/db/query/compiler/optimizer/join/graph_cycle_breaker.cpp b/src/mongo/db/query/compiler/optimizer/join/graph_cycle_breaker.cpp index 2cc8e42c885..a64c9c33279 100644 --- a/src/mongo/db/query/compiler/optimizer/join/graph_cycle_breaker.cpp +++ b/src/mongo/db/query/compiler/optimizer/join/graph_cycle_breaker.cpp @@ -29,12 +29,42 @@ #include "mongo/db/query/compiler/optimizer/join/graph_cycle_breaker.h" -#include "mongo/db/query/util/bitset_util.h" +#include +#include #include namespace mongo::join_ordering { namespace { +/** + * Creates a graph with predicates as edges and fields as nodes from the given 'joinGraph'. The + * parameter 'numNodes' representas the number of nodes in the new graph. It is the duty of the + * caller to make sure that 'numNodes' is big enough. + */ +AdjacencyList makeAdjacencyList(const JoinGraph& joinGraph, size_t numNodes) { + AdjacencyList adjList; + adjList.neighbors.resize(numNodes); + + for (EdgeId edgeId = 0; edgeId != joinGraph.numEdges(); ++edgeId) { + const auto& edge = joinGraph.getEdge(edgeId); + for (uint16_t predIndex = 0; predIndex != static_cast(edge.predicates.size()); + ++predIndex) { + const auto& predicate = edge.predicates[predIndex]; + const PredicateId predicateId = static_cast(adjList.predicates.size()); + + adjList.neighbors[predicate.left].emplace_back(predicate.right, predicateId); + adjList.neighbors[predicate.right].emplace_back(predicate.left, predicateId); + adjList.predicates.emplace_back(edgeId, predIndex); + } + } + + tassert(11509300, + "Too many predicates in thejoin graph", + adjList.predicates.size() < kMaxNumberOfPredicates); + + return adjList; +} + /** * Backtracking with prunings for finding cycles in an undirected graph. */ @@ -49,10 +79,6 @@ public: // Needed to handle duplicates. absl::flat_hash_set cycles; - // 'cleared*' bitsets are used to clear their correponding bitsets. - const Bitset clearedBlocked{_adjList.neighbors.size()}; - const Bitset clearedEdges{_adjList.predicates.size()}; - // This loop tries to find cycles starting and ending with every node. for (size_t start = 0; start != _adjList.neighbors.size(); ++start) { if (_adjList.neighbors[start].size() == 0) { @@ -60,8 +86,9 @@ public: } // Reset the state to prepare a new search. - _blocked &= clearedBlocked; - _edges &= clearedEdges; + _blocked.clear(); + _edges.clear(); + ; findCircuits(static_cast(start), static_cast(start), cycles); } @@ -82,7 +109,7 @@ private: // Consider only nodes which >= start node, since cycles involving nodes < start were // discovered in earlier call of the function. We also don't want to backtrack already // tracked edges. - if (v < start || _edges[predId]) { + if (v < start || _edges.test(predId)) { continue; } @@ -91,7 +118,7 @@ private: // Found a cycle. cycles.insert(_edges); isCycleFound = true; - } else if (!_blocked[v]) { + } else if (!_blocked.test(v)) { if (findCircuits(start, v, cycles)) { isCycleFound = true; } @@ -108,6 +135,31 @@ private: // A node is blocked if it's currently being explored. Bitset _blocked; }; + +/** + * Returns the largest path seen in the graph + 1. + */ +size_t getNumberOfPaths(const JoinGraph& graph) { + boost::optional maxPath; + for (const auto& edge : graph.edges()) { + for (const auto& pred : edge.predicates) { + maxPath = std::max({maxPath.value_or(0), pred.left, pred.right}); + } + } + return maxPath.has_value() ? *maxPath + 1 : 0; +} + +/** + * Returns a bitset, where each bit corresponds to an edge, a bit is set if its correspondong + * edgehas non-compound predicates (< 2). + */ +Bitset getEdgesWithSimplePredicates(const JoinGraph& graph) { + Bitset edgesWithSimplePredicates(graph.numEdges()); + for (EdgeId edgeId = 0; edgeId != graph.numEdges(); ++edgeId) { + edgesWithSimplePredicates.set(edgeId, graph.getEdge(edgeId).predicates.size() < 2); + } + return edgesWithSimplePredicates; +} } // namespace JoinGraphCycles findCycles(AdjacencyList adjList) { @@ -118,75 +170,94 @@ JoinGraphCycles findCycles(AdjacencyList adjList) { .predicates = std::move(adjList.predicates)}; } +GraphCycleBreaker::GraphCycleBreaker(const JoinGraph& graph, + const EdgeSelectivities& edgeSelectivities, + size_t numPaths) + : _numEdges(graph.numEdges()) { + tassert(11509301, + "Edges selectivites are not provided for all edges", + edgeSelectivities.size() >= _numEdges); + + if (numPaths == 0) { + numPaths = getNumberOfPaths(graph); + } else if constexpr (kDebugBuild) { + tassert( + 11509302, "Incorrect number of paths provided", getNumberOfPaths(graph) <= numPaths); + } + + // 1. Create a graph based on predicates as edges and fields (PathId) as nodes. + auto adjList = makeAdjacencyList(graph, numPaths); + // 2. Finds all the cycles in the graph. + auto cycles = findCycles(std::move(adjList)); + + // 3. Find edges which we would prefer to delete in order to break the cycles and store them + // together with the cycles. + const auto edgesWithSimplePredicates = getEdgesWithSimplePredicates(graph); + _cycles.reserve(cycles.cycles.size()); + std::transform( + cycles.cycles.begin(), + cycles.cycles.end(), + std::back_inserter(_cycles), + [this, &cycles, &edgeSelectivities, edgesWithSimplePredicates](const Bitset& cycle) { + auto edges = getEdgeBitset(cycle, cycles.predicates); + auto edge = breakCycle(edges, edgeSelectivities, edgesWithSimplePredicates); + return CycleInfo{.edges = std::move(edges), .edge = edge}; + }); + // 4. Sorting the cycles to make the cycle breaker consistent. + std::sort(_cycles.begin(), _cycles.end()); +} + std::vector GraphCycleBreaker::breakCycles(std::vector subgraph) { - // Performs a cycle detection in undirected graph using DFS. Once a cycle is detected the - // last edge detected edge of the cycle is removed to break the cycle. - // - // clean up - _seen.reset(); - _edgesToRemove.clear(); - std::fill(begin(_parents), end(_parents), _sentinel); + Bitset edgesBitset(_numEdges); + for (auto edgeId : subgraph) { + edgesBitset.set(edgeId); + } - // Calculate adjacency matrix for the subgraph. - const auto adjMatrix = makeAdjacencyMatrix(_graph, subgraph); - - // Traverse the graph to find out cycles and add edges to delete in _edgesToRemove. - for (NodeId nodeId = 0; nodeId != _sentinel; ++nodeId) { - if (!_seen[nodeId]) { - visit(nodeId, adjMatrix, subgraph); + for (const auto& cycle : _cycles) { + // The subgraph contains the current cycle if all the cycle edges are subset of the + // subgraph excluding already deleted nodes. + if (cycle.edges.isSubsetOf(edgesBitset)) { + edgesBitset.set(cycle.edge, false); } } - // Remove edges to break the identified cycles. - auto end = std::remove_if(subgraph.begin(), subgraph.end(), [&](int edgeId) { - return _edgesToRemove.contains(edgeId); - }); + auto end = std::remove_if( + subgraph.begin(), subgraph.end(), [&](int edgeId) { return !edgesBitset.test(edgeId); }); subgraph.erase(end, subgraph.end()); return subgraph; } -void GraphCycleBreaker::visit(NodeId nodeId, - const AdjacencyMatrix& matrix, - const std::vector& edges) { - _seen.set(nodeId); - for (auto otherNodeId : iterable(matrix[nodeId], _graph.numNodes())) { - if (!_seen[otherNodeId]) { - _parents[otherNodeId] = nodeId; - visit(otherNodeId, matrix, edges); - } else if (_parents[nodeId] != otherNodeId) { // check that we don't traverse the edge back - breakCycle(/*currentNode*/ otherNodeId, /*previousNode*/ nodeId, edges); +EdgeId GraphCycleBreaker::breakCycle(const Bitset& cycleBitset, + const EdgeSelectivities& edgeSelectivities, + const Bitset& edgesWithSimplePredicates) const { + // 1. Break a cycle of 3 by removing the middle edge sorted by selectivity. + if (cycleBitset.count() == 3) { + absl::InlinedVector, 3> + selectivities; + for (auto edgeId : makePopulationView(cycleBitset)) { + selectivities.emplace_back(edgeSelectivities.at(edgeId), static_cast(edgeId)); } + std::sort(selectivities.begin(), selectivities.end()); + return selectivities[1].second; } + + // 2. Or break a cycle by selecting a non-compound edge. + if (cycleBitset.intersects(edgesWithSimplePredicates)) { + return (cycleBitset & edgesWithSimplePredicates).findFirst(); + } + + // 3. Or break a cycle by selecting any edge. + return cycleBitset.findFirst(); } -void GraphCycleBreaker::breakCycle(NodeId currentNode, - NodeId previousNode, - const std::vector& edges) { - // In this naive implementation of breaking cycles we just remove the last discovered node. This - // is the simpliest solution and doesn't break '_parents'. However, if we delete a node from the - // middle of the cycle we should: - // * correct the '_parents', so that if another cycle is discovered we can use '_parents' to - // iterate over the cycle; - // * call the 'visit' with the 'currentNode'. - // - // TODO SERVER-114121: We assume here's just one edge which connects these two nodes, - // which is true now. When we start supporting edges which have multiple nodes on one - // side the things will become more complicated. - auto edgeId = findEdgeId(previousNode, currentNode, edges); - tassert(11116500, "The graph edge is expected to exist", edgeId); - _edgesToRemove.insert(*edgeId); -} - -boost::optional GraphCycleBreaker::findEdgeId(NodeId u, - NodeId v, - const std::vector& edges) const { - for (auto edgeId : edges) { - const auto& edge = _graph.getEdge(edgeId); - if ((edge.left == u && edge.right == v) || (edge.left == v && edge.right == u)) { - return edgeId; - } +Bitset GraphCycleBreaker::getEdgeBitset( + const Bitset& predicateBitset, + const std::vector>& predicates) const { + Bitset bitset{_numEdges}; + for (auto index : makePopulationView(predicateBitset)) { + bitset.set(predicates[index].first); } - return boost::none; + return bitset; } } // namespace mongo::join_ordering diff --git a/src/mongo/db/query/compiler/optimizer/join/graph_cycle_breaker.h b/src/mongo/db/query/compiler/optimizer/join/graph_cycle_breaker.h index f6dbf699958..e94b06c2576 100644 --- a/src/mongo/db/query/compiler/optimizer/join/graph_cycle_breaker.h +++ b/src/mongo/db/query/compiler/optimizer/join/graph_cycle_breaker.h @@ -29,7 +29,7 @@ #pragma once -#include "mongo/db/query/compiler/optimizer/join/adjacency_matrix.h" +#include "mongo/db/query/compiler/optimizer/join/cardinality_estimator.h" #include "mongo/db/query/compiler/optimizer/join/join_graph.h" #include "mongo/util/dynamic_bitset.h" #include "mongo/util/modules.h" @@ -75,8 +75,29 @@ JoinGraphCycles findCycles(AdjacencyList adjList); * subgraph to break its cycles. */ class GraphCycleBreaker { + struct CycleInfo { + /** + * Cycle stored as a bitset of edges forming the cycle. + */ + Bitset edges; + + /** + * Edge to delete to break the cycle . + */ + EdgeId edge; + + friend auto operator<=>(const CycleInfo&, const CycleInfo&) = default; + }; + public: - explicit GraphCycleBreaker(const JoinGraph& graph) : _graph(graph) {} + /** + * Initializes new instance of GraphCycleBreaker for the given 'graph'. + * 'edgeSelectivities' is expected to have selectivities for all edges of the graph, + * 'numPaths' is expected to be the number of resolved paths. + */ + explicit GraphCycleBreaker(const JoinGraph& graph, + const EdgeSelectivities& edgeSelectivities, + size_t numPaths = 0); /** * Break cycles for the subgraph specified by the list of edges. It returns new subgraph without @@ -84,26 +105,18 @@ public: */ std::vector breakCycles(std::vector subgraph); + size_t numCycles_forTest() const { + return _cycles.size(); + } + private: - /** - * Implements DFS algorithm to detect graph cycles. - */ - void visit(NodeId nodeId, const AdjacencyMatrix& matrix, const std::vector& edges); + EdgeId breakCycle(const Bitset& cycleBitset, + const EdgeSelectivities& edgeSelectivities, + const Bitset& edgesWithSimplePredicates) const; + Bitset getEdgeBitset(const Bitset& predicateBitset, + const std::vector>& predicates) const; - /** - * Breaks a cycle which is defined by the currentNode and its parent previousNode. - */ - void breakCycle(NodeId currentNode, NodeId previousNode, const std::vector& edges); - - /** - * Find EdgeId that connects two specified nodes u and v. - */ - boost::optional findEdgeId(NodeId u, NodeId v, const std::vector& edges) const; - - const JoinGraph& _graph; - const NodeId _sentinel{kMaxNodesInJoin}; - NodeSet _seen; - std::array _parents; - absl::btree_set _edgesToRemove; + size_t _numEdges; + std::vector _cycles; }; } // namespace mongo::join_ordering diff --git a/src/mongo/db/query/compiler/optimizer/join/graph_cycle_breaker_test.cpp b/src/mongo/db/query/compiler/optimizer/join/graph_cycle_breaker_test.cpp index f84e0b99594..b220fdd0d69 100644 --- a/src/mongo/db/query/compiler/optimizer/join/graph_cycle_breaker_test.cpp +++ b/src/mongo/db/query/compiler/optimizer/join/graph_cycle_breaker_test.cpp @@ -27,7 +27,6 @@ * it in the license file. */ - #include "mongo/db/query/compiler/optimizer/join/graph_cycle_breaker.h" #include "mongo/db/query/compiler/optimizer/join/unit_test_helpers.h" @@ -66,13 +65,25 @@ constexpr size_t cyclesInClique(size_t numNodes) { class GraphCycleBreakerTest : public unittest::Test { public: - GraphCycleBreakerTest() : graph{}, breaker(graph) { + static constexpr EdgeId kMaxEdgeId = 63; + + using PathMap = absl::flat_hash_map; + GraphCycleBreakerTest() : graph{} { a = addNode("a"); b = addNode("b"); c = addNode("c"); d = addNode("d"); e = addNode("e"); f = addNode("f"); + + defaultPaths = {{a, pa}, {b, pb}, {c, pc}, {d, pd}, {e, pe}, {f, pf}}; + alternativePaths = {{a, pa1}, {b, pb1}, {c, pc1}, {d, pd1}, {e, pe1}, {f, pf1}}; + + for (size_t i = 0; i <= kMaxEdgeId; ++i) { + edgeSelectivities.emplace_back( + cost_based_ranker::SelectivityType{static_cast(i + 1) / (2.0 * kMaxEdgeId)}, + cost_based_ranker::EstimationSource::Code); + } } NodeId addNode(StringData collName) { @@ -80,29 +91,144 @@ public: } EdgeId addEdge(NodeId u, NodeId v) { - return *graph.addEdge(u, v, {}); + return addEdge(u, v, defaultPaths); + } + + EdgeId addEdge(NodeId u, NodeId v, PathMap pathMap) { + return *graph.addSimpleEqualityEdge(u, v, pathMap[u], pathMap[v]); + } + + EdgeId addEdge(NodeId u, NodeId v, PathId uPath, PathId vPath) { + return *graph.addSimpleEqualityEdge(u, v, uPath, vPath); + } + + EdgeId addCompoundEdge(NodeId u, NodeId v) { + addEdge(u, v, defaultPaths); + return addEdge(u, v, alternativePaths); + } + + void validateCycles(size_t expectedNumberOfCycles) { + ASSERT_LTE(graph.numEdges(), kMaxEdgeId + 1); + + GraphCycleBreaker breaker{graph, edgeSelectivities}; + ASSERT_EQ(breaker.numCycles_forTest(), expectedNumberOfCycles); } template std::vector breakCycles(EdgeIds... edges) { - return breaker.breakCycles({edges...}); + ASSERT_LTE(graph.numEdges(), kMaxEdgeId + 1); + + GraphCycleBreaker breaker{graph, edgeSelectivities}; + auto newEdges = breaker.breakCycles({edges...}); + return newEdges; } + void setSelectivity(EdgeId edgeId, double newSelectivity) { + edgeSelectivities[edgeId] = cost_based_ranker::SelectivityEstimate( + cost_based_ranker::SelectivityType(newSelectivity), + cost_based_ranker::EstimationSource::Code); + } + + static constexpr PathId p1{1}, p2{2}, pa{11}, pb{12}, pc{13}, pd{14}, pe{15}, pf{16}, pa1{21}, + pb1{22}, pc1{23}, pd1{24}, pe1{25}, pf1{26}, pEnd{27}; + JoinGraph graph; - GraphCycleBreaker breaker; NodeId a, b, c, d, e, f; + absl::flat_hash_map defaultPaths; + absl::flat_hash_map alternativePaths; + EdgeSelectivities edgeSelectivities; }; -TEST_F(GraphCycleBreakerTest, GraphWithCycle) { +/** + * Tests that a middle by selectivity edge is always selected to break cycles. + */ +TEST_F(GraphCycleBreakerTest, MiddleEdge) { + auto edge_ab = addCompoundEdge(a, b); + auto edge_bc = addEdge(b, c); + auto edge_ca = addEdge(c, a); + + // B-C is a middle by selectivity. + setSelectivity(edge_ab, 0.003); + setSelectivity(edge_bc, 0.005); + setSelectivity(edge_ca, 0.007); + + // Case1: B-C is the middle. Selectivities: 0.003, 0.005, 0.007. + std::vector expectedEdges{edge_ab, edge_ca}; + + auto newEdges = breakCycles(edge_ab, edge_bc, edge_ca); + std::sort(newEdges.begin(), newEdges.end()); + ASSERT_EQ(newEdges, expectedEdges); + + // Case 2: C-A is the middle. Selectivities: 0.003, 0.009, 0.007. + // Increase B-C selectivity which makes C-A a new middle. + setSelectivity(edge_bc, 0.009); + expectedEdges = {edge_ab, edge_bc}; + + newEdges = breakCycles(edge_ab, edge_bc, edge_ca); + std::sort(newEdges.begin(), newEdges.end()); + ASSERT_EQ(newEdges, expectedEdges); + + // Case 3: Compound edge A-B removal. Selectivities: 0.008, 0.009, 0.007. + // Increase A-B selectivity which makes A-B a new middle. + setSelectivity(edge_ab, 0.008); + expectedEdges = {edge_bc, edge_ca}; + + newEdges = breakCycles(edge_ab, edge_bc, edge_ca); + std::sort(newEdges.begin(), newEdges.end()); + ASSERT_EQ(newEdges, expectedEdges); +} + +/** + * Tests that a non-compund edge is selected if available to break a cycle of size > 3. + */ +TEST_F(GraphCycleBreakerTest, NoCompoundEdgeSelection) { + auto edge_ab = addCompoundEdge(a, b); + auto edge_bc = addCompoundEdge(b, c); + auto edge_cd = addEdge(c, d); + auto edge_da = addCompoundEdge(d, a); + + // We expected that the only non-compound edge C-D will be removed to break the 4-cycle. + std::vector expectedEdges{edge_ab, edge_bc, edge_da}; + + auto newEdges = breakCycles(edge_ab, edge_bc, edge_cd, edge_da); + std::sort(newEdges.begin(), newEdges.end()); + ASSERT_EQ(newEdges, expectedEdges); +} + +/** + * The cycles still breaks even if all edges are compound. + */ +TEST_F(GraphCycleBreakerTest, AllEdgesAreCompound) { + auto edge_ab = addCompoundEdge(a, b); + auto edge_bc = addCompoundEdge(b, c); + auto edge_cd = addCompoundEdge(c, d); + auto edge_da = addCompoundEdge(d, a); + + auto newEdges = breakCycles(edge_ab, edge_bc, edge_cd, edge_da); + + // One edge was removed to break the cycle. + ASSERT_EQ(newEdges.size(), 3); +} + +TEST_F(GraphCycleBreakerTest, GraphWithComplexCycles) { + // A.a = B.a and B.a = C.a and C.a = D.a and D.a = A.a and C.a = A.a + // Three cycles: + // * A.a - B.a - C.a - D.a - A.a + // * A.a - B.a - C.a - A.a + // * A.a - C.a - D.a - A.a auto edge_ab = addEdge(a, b); auto edge_bc = addEdge(b, c); auto edge_cd = addEdge(c, d); auto edge_da = addEdge(d, a); auto edge_ca = addEdge(c, a); + // This edge is ignored due to its incompatible predicates. + auto edge_db = addEdge(d, b, alternativePaths); - auto newEdges = breakCycles(edge_ab, edge_bc, edge_cd, edge_da, edge_ca); + validateCycles(/*expectedNumberOfCycles*/ 3); + + auto newEdges = breakCycles(edge_ab, edge_bc, edge_cd, edge_da, edge_ca, edge_db); // Two edges are expected to be removed. - ASSERT_EQ(newEdges.size(), 3); + ASSERT_EQ(newEdges.size(), 4); newEdges = breakCycles(edge_ab, edge_bc, edge_ca); // Subgraph case. @@ -110,40 +236,198 @@ TEST_F(GraphCycleBreakerTest, GraphWithCycle) { } TEST_F(GraphCycleBreakerTest, NoCycleGraph) { + // A.a = B.a and B.a = C.a auto edge_ab = addEdge(a, b); auto edge_bc = addEdge(b, c); auto edge_cd = addEdge(c, d); + validateCycles(/*expectedNumberOfCycles*/ 0); + auto newEdges = breakCycles(edge_ab, edge_bc, edge_cd); // No edges are expected to be removed. ASSERT_EQ(newEdges.size(), 3); } TEST_F(GraphCycleBreakerTest, DisconnectedGraph) { + // A.a = B.a and C.a = D.a auto edge_ab = addEdge(a, b); auto edge_cd = addEdge(c, d); + validateCycles(/*expectedNumberOfCycles*/ 0); + auto newEdges = breakCycles(edge_ab, edge_cd); // No edges are expected to be removed. ASSERT_EQ(newEdges.size(), 2); } TEST_F(GraphCycleBreakerTest, DisconnectedGraphWithCycles) { - // A - B - C - A + // A.a - B.a - C.a - A.a auto edge_ab = addEdge(a, b); auto edge_bc = addEdge(b, c); auto edge_ca = addEdge(c, a); - // D - E - F - D + // D.a - E.a - F.a - D.a auto edge_de = addEdge(d, e); auto edge_ef = addEdge(e, f); auto edge_fd = addEdge(f, d); + validateCycles(/*expectedNumberOfCycles*/ 2); + auto newEdges = breakCycles(edge_ab, edge_bc, edge_ca, edge_de, edge_ef, edge_fd); // Two edges are expected to be removed. ASSERT_EQ(newEdges.size(), 4); } +TEST_F(GraphCycleBreakerTest, IncompatiblePredicates) { + // Define a big cycle with compatible predicates: + // A.a = B.a and B.a = C.a and C.a = D.a and D.a = E.a and E.a = F.a and F.a = A.a + auto edge_ab = addEdge(a, b); + auto edge_bc = addEdge(b, c); + auto edge_cd = addEdge(c, d); + auto edge_de = addEdge(d, e); + auto edge_ef = addEdge(e, f); + auto edge_fa = addEdge(f, a); + + // Add edges with incompatible predicates. These edges would add additional cycles in the graph + // if not their predicates: + // C.b = A.b and D.b = A.b and E.b = A.b + auto edge_ca = addEdge(c, a, alternativePaths); + auto edge_da = addEdge(d, a, alternativePaths); + auto edge_ea = addEdge(e, a, alternativePaths); + + validateCycles(/*expectedNumberOfCycles*/ 1); + + // One big loop case + { + auto newEdges = breakCycles( + edge_ab, edge_bc, edge_cd, edge_de, edge_ef, edge_fa, edge_ca, edge_da, edge_ea); + // Here's only one loop so only one edge is expected to be removed. + ASSERT_EQ(newEdges.size(), 8); + } + + // No loops (edge_fa is missing) + { + auto newEdges = + breakCycles(edge_ab, edge_bc, edge_cd, edge_de, edge_ef, edge_ca, edge_da, edge_ea); + // No edges is expected to be removed + ASSERT_EQ(newEdges.size(), 8); + } +} + +TEST_F(GraphCycleBreakerTest, TwoCyclesWithDifferentPredicates) { + // Cycle 1: A.a == B.a and B.a == C.a and C.a == D.a and D.a == A.a + auto edge_ab = addEdge(a, b); + auto edge_bc = addEdge(b, c); + auto edge_cd = addEdge(c, d); + auto edge_da = addEdge(d, a); + + // Cycle 2: D.b == E.b and E.b == F.b and F.b == D.b + auto edge_de = addEdge(d, e, alternativePaths); + auto edge_ef = addEdge(e, f, alternativePaths); + auto edge_fd = addEdge(f, d, alternativePaths); + + // Standalone edge. + // Because of the predicates there it doesn't form a big cycle: A - B - C - D - E - F - A + auto edge_fa = addEdge(f, a, p1, p2); + + validateCycles(/*expectedNumberOfCycles*/ 2); + + auto newEdges = + breakCycles(edge_ab, edge_bc, edge_cd, edge_da, edge_de, edge_ef, edge_fd, edge_fa); + // Two loops + ASSERT_EQ(newEdges.size(), 6); +} + +TEST_F(GraphCycleBreakerTest, TwoCyclesWithASharedEdge) { + // Cycle 1: A.a == B.a and B.a == C.a and C.a == D.a and D.a == A.a + auto edge_ab = addEdge(a, b); + auto edge_bc = addEdge(b, c); + auto edge_cd = addEdge(c, d); + auto edge_da = addEdge(d, a); + + // Cycle 2: A.b == D.b and D.b == E.b and E.b == F.b and F.b == A.b + auto edge_ad = addEdge(a, d, alternativePaths); + auto edge_de = addEdge(d, e, alternativePaths); + auto edge_ef = addEdge(e, f, alternativePaths); + auto edge_fa = addEdge(f, a, alternativePaths); + + validateCycles(/*expectedNumberOfCycles*/ 2); + + // The shared edge + ASSERT_EQ(edge_da, edge_ad); + + auto newEdges = + breakCycles(edge_ab, edge_bc, edge_cd, edge_da, edge_ad, edge_de, edge_ef, edge_fa); + + ASSERT_EQ(newEdges.size(), 6); +} + +TEST_F(GraphCycleBreakerTest, CliqueOf4) { + // Define a big cycle with compatible predicates: + // A.a = B.a and B.a = C.a and C.a = D.a and D.a = E.a and E.a = F.a and C.a = A.a and D.a = A.a + // and D.a = B.a. + // A.a, B.a, C.a, D.a form a clique. + // 7 loops in the clique: + // * 001111: A - B - C - D - A + // * 010011: A - B - C - A + // * 011100: A - C - D - A + // * 100110: B - C - D - B + // * 101001: A - B - D - A + // * 110101: A - B - D - C - A + // * 111010: A - D - B - C - A + auto edge_ab = addEdge(a, b); + auto edge_bc = addEdge(b, c); + auto edge_cd = addEdge(c, d); + auto edge_da = addEdge(d, a); + auto edge_ca = addEdge(c, a); + auto edge_db = addEdge(d, b); + auto edge_de = addEdge(d, e); + auto edge_ef = addEdge(e, f); + + validateCycles(/*expectedNumberOfCycles*/ cyclesInClique(4)); + + auto newEdges = + breakCycles(edge_ab, edge_bc, edge_cd, edge_de, edge_ef, edge_ca, edge_da, edge_db); + ASSERT_EQ(newEdges.size(), 5); +} + +TEST_F(GraphCycleBreakerTest, TwoCliquesOf4) { + addEdge(a, b); + addEdge(b, c); + addEdge(c, d); + addEdge(d, a); + addEdge(c, a); + addEdge(d, b); + addEdge(d, e); + addEdge(e, f); + + addEdge(a, b, alternativePaths); + addEdge(b, c, alternativePaths); + addEdge(c, d, alternativePaths); + addEdge(d, a, alternativePaths); + addEdge(c, a, alternativePaths); + addEdge(d, b, alternativePaths); + addEdge(d, e, alternativePaths); + addEdge(e, f, alternativePaths); + + validateCycles(/*expectedNumberOfCycles*/ 2 * cyclesInClique(4)); +} + +TEST_F(GraphCycleBreakerTest, CliqueOf6) { + std::vector nodes{a, b, c, d, e, f}; + + for (auto left : nodes) { + for (auto right : nodes) { + if (left < right) { + addEdge(left, right); + } + } + } + + validateCycles(/*expectedNumberOfCycles*/ cyclesInClique(6)); +} + + // *************************************************** // findCycles tests diff --git a/src/mongo/db/query/compiler/optimizer/join/join_graph.h b/src/mongo/db/query/compiler/optimizer/join/join_graph.h index ff69101578b..6fcc466257c 100644 --- a/src/mongo/db/query/compiler/optimizer/join/join_graph.h +++ b/src/mongo/db/query/compiler/optimizer/join/join_graph.h @@ -47,6 +47,10 @@ constexpr size_t kMaxNodesInJoin = 64; */ constexpr size_t kMaxEdgesInJoin = std::numeric_limits::max(); +/** The maximum number of predicates in a Join Graph. + */ +constexpr size_t kMaxNumberOfPredicates = std::numeric_limits::max(); + /** NodeSet is a bitset representation of a subset of join nodes. It is used to efficiently * track which nodes are included in an intermediate join. This compact representation is highly * effective for the join reordering algorithm. @@ -227,8 +231,8 @@ public: return _nodes.size(); } - size_t numEdges() const { - return _edges.size(); + EdgeId numEdges() const { + return static_cast(_edges.size()); } /** Serializes the Join Graph to BSON. diff --git a/src/mongo/db/query/compiler/optimizer/join/plan_enumerator.cpp b/src/mongo/db/query/compiler/optimizer/join/plan_enumerator.cpp index daf3b7542bb..a10a2eec148 100644 --- a/src/mongo/db/query/compiler/optimizer/join/plan_enumerator.cpp +++ b/src/mongo/db/query/compiler/optimizer/join/plan_enumerator.cpp @@ -148,7 +148,6 @@ void PlanEnumeratorContext::addJoinPlan(PlanTreeShape type, } if (method == JoinMethod::INLJ) { - // TODO SERVER-115093: Change this to an equality tassert once we break cycles. tassert(11371701, "Expected at least one edge", edges.size() >= 1); auto edge = edges[0]; auto ie = bestIndexSatisfyingJoinPredicates(