SERVER-115093 Reimplement join graph cycle breaker (#45054)

GitOrigin-RevId: 4daf17acd855213f09bec5e5f5bfa7c875189cd7
This commit is contained in:
Alexander Ignatyev 2025-12-15 20:19:45 +00:00 committed by MongoDB Bot
parent 6b3f4e62d6
commit b506d1fe7c
9 changed files with 470 additions and 340 deletions

View File

@ -20,7 +20,6 @@ exports_files(
mongo_cc_library(
name = "join_ordering",
srcs = [
"adjacency_matrix.cpp",
"graph_cycle_breaker.cpp",
"join_graph.cpp",
"path_resolver.cpp",
@ -28,6 +27,7 @@ mongo_cc_library(
deps = [
"//src/mongo/db:server_base",
"//src/mongo/db/query:canonical_query_base",
"//src/mongo/db/query/compiler/optimizer/cost_based_ranker:estimates",
"//src/mongo/db/query/util:query_util_bitset",
"//src/mongo/db/query/util:query_util_disjoint_set",
],
@ -59,7 +59,6 @@ mongo_cc_library(
mongo_cc_unit_test(
name = "join_ordering_test",
srcs = [
"adjacency_matrix_test.cpp",
"agg_join_model_test.cpp",
"cardinality_estimator_join_test.cpp",
"graph_cycle_breaker_test.cpp",

View File

@ -1,46 +0,0 @@
/**
* Copyright (C) 2025-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/db/query/compiler/optimizer/join/adjacency_matrix.h"
#include "mongo/db/query/util/bitset_util.h"
namespace mongo::join_ordering {
AdjacencyMatrix makeAdjacencyMatrix(const JoinGraph& joinGraph,
const std::vector<EdgeId>& subgraph) {
AdjacencyMatrix matrix;
for (auto edgeId : subgraph) {
const auto& edge = joinGraph.getEdge(edgeId);
matrix[edge.left].set(edge.right);
matrix[edge.right].set(edge.left);
}
return matrix;
}
} // namespace mongo::join_ordering

View File

@ -1,49 +0,0 @@
/**
* Copyright (C) 2025-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#pragma once
#include "mongo/db/query/compiler/optimizer/join/join_graph.h"
#include "mongo/util/modules.h"
namespace mongo::join_ordering {
/**
* Adjacency matrix for a join subgraph. it determines connectivity between some nodes 'u' and 'v':
* 'adjacencyMatrix[u][v]' returns 'true' if the nodes are directly connected.
* Since the join graph is undirected, the matrix is always symmetrical:
* 'adjacencyMatrix[u][v] == adjacencyMatrix[v][u]'.
*/
using AdjacencyMatrix = std::array<NodeSet, kMaxNodesInJoin>;
/**
* Builds an AdjacencyMatrix for a specified 'subgraph' within the 'joinGraph'.
*/
AdjacencyMatrix makeAdjacencyMatrix(const JoinGraph& joinGraph,
const std::vector<EdgeId>& subgraph);
} // namespace mongo::join_ordering

View File

@ -1,145 +0,0 @@
/**
* Copyright (C) 2025-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
* as published by MongoDB, Inc.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* Server Side Public License for more details.
*
* You should have received a copy of the Server Side Public License
* along with this program. If not, see
* <http://www.mongodb.com/licensing/server-side-public-license>.
*
* As a special exception, the copyright holders give permission to link the
* code of portions of this program with the OpenSSL library under certain
* conditions as described in each individual source file and distribute
* linked combinations including the program with the OpenSSL library. You
* must comply with the Server Side Public License in all respects for
* all of the code used other than as permitted herein. If you modify file(s)
* with this exception, you may extend this exception to your version of the
* file(s), but you are not obligated to do so. If you do not wish to do so,
* delete this exception statement from your version. If you delete this
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
#include "mongo/db/query/compiler/optimizer/join/adjacency_matrix.h"
#include "mongo/db/query/compiler/optimizer/join/unit_test_helpers.h"
#include "mongo/unittest/unittest.h"
namespace mongo::join_ordering {
namespace {
AdjacencyMatrix makeMatrix(std::vector<std::string_view> rows) {
AdjacencyMatrix matrix;
for (size_t i = 0; i < rows.size(); ++i) {
matrix[i] = NodeSet{rows[i].data(), rows[i].size()};
}
return matrix;
}
NodeId addNode(JoinGraph& graph) {
return *graph.addNode(makeNSS("test"), nullptr, boost::none);
}
EdgeId addEdge(JoinGraph& graph, NodeId left, NodeId right) {
return *graph.addSimpleEqualityEdge(left, right, 0, 0);
}
} // namespace
TEST(AdjacencyMatrixBuilderTest, Smoke) {
JoinGraph graph;
auto node0 = addNode(graph);
auto node1 = addNode(graph);
auto node2 = addNode(graph);
auto node3 = addNode(graph);
auto node4 = addNode(graph);
auto edge01 = addEdge(graph, node0, node1);
auto edge12 = addEdge(graph, node1, node2);
auto edge23 = addEdge(graph, node2, node3);
auto edge34 = addEdge(graph, node3, node4);
auto edge40 = addEdge(graph, node4, node0);
// No edges case
{
auto expectedMatrix = makeMatrix({
"00000",
"00000",
"00000",
"00000",
"00000",
});
ASSERT_EQ(makeAdjacencyMatrix(graph, {}), expectedMatrix);
}
// All edges case
{
auto expectedMatrix = makeMatrix({
"10010",
"00101",
"01010",
"10100",
"01001",
});
ASSERT_EQ(makeAdjacencyMatrix(graph, {edge01, edge12, edge23, edge34, edge40}),
expectedMatrix);
}
// 2 edges case
{
auto expectedMatrix = makeMatrix({
"00010",
"00101",
"00010",
"00000",
"00000",
});
ASSERT_EQ(makeAdjacencyMatrix(graph, {edge01, edge12}), expectedMatrix);
}
// 1 edge case
{
auto expectedMatrix = makeMatrix({
"00000",
"00100",
"00010",
"00000",
"00000",
});
ASSERT_EQ(makeAdjacencyMatrix(graph, {edge12}), expectedMatrix);
}
}
TEST(AdjacencyMatrixBuilderTest, FullyConnectedGraph) {
JoinGraph graph;
auto node0 = addNode(graph);
auto node1 = addNode(graph);
auto node2 = addNode(graph);
auto node3 = addNode(graph);
auto edge01 = addEdge(graph, node0, node1);
auto edge02 = addEdge(graph, node0, node2);
auto edge03 = addEdge(graph, node0, node3);
auto edge12 = addEdge(graph, node1, node2);
auto edge13 = addEdge(graph, node1, node3);
auto edge23 = addEdge(graph, node2, node3);
auto expectedMatrix = makeMatrix({
"1110",
"1101",
"1011",
"0111",
});
ASSERT_EQ(makeAdjacencyMatrix(graph, {edge01, edge02, edge03, edge12, edge13, edge23}),
expectedMatrix);
}
} // namespace mongo::join_ordering

View File

@ -29,12 +29,42 @@
#include "mongo/db/query/compiler/optimizer/join/graph_cycle_breaker.h"
#include "mongo/db/query/util/bitset_util.h"
#include <algorithm>
#include <iterator>
#include <absl/container/flat_hash_set.h>
namespace mongo::join_ordering {
namespace {
/**
* Creates a graph with predicates as edges and fields as nodes from the given 'joinGraph'. The
* parameter 'numNodes' representas the number of nodes in the new graph. It is the duty of the
* caller to make sure that 'numNodes' is big enough.
*/
AdjacencyList makeAdjacencyList(const JoinGraph& joinGraph, size_t numNodes) {
AdjacencyList adjList;
adjList.neighbors.resize(numNodes);
for (EdgeId edgeId = 0; edgeId != joinGraph.numEdges(); ++edgeId) {
const auto& edge = joinGraph.getEdge(edgeId);
for (uint16_t predIndex = 0; predIndex != static_cast<uint16_t>(edge.predicates.size());
++predIndex) {
const auto& predicate = edge.predicates[predIndex];
const PredicateId predicateId = static_cast<PredicateId>(adjList.predicates.size());
adjList.neighbors[predicate.left].emplace_back(predicate.right, predicateId);
adjList.neighbors[predicate.right].emplace_back(predicate.left, predicateId);
adjList.predicates.emplace_back(edgeId, predIndex);
}
}
tassert(11509300,
"Too many predicates in thejoin graph",
adjList.predicates.size() < kMaxNumberOfPredicates);
return adjList;
}
/**
* Backtracking with prunings for finding cycles in an undirected graph.
*/
@ -49,10 +79,6 @@ public:
// Needed to handle duplicates.
absl::flat_hash_set<Bitset> cycles;
// 'cleared*' bitsets are used to clear their correponding bitsets.
const Bitset clearedBlocked{_adjList.neighbors.size()};
const Bitset clearedEdges{_adjList.predicates.size()};
// This loop tries to find cycles starting and ending with every node.
for (size_t start = 0; start != _adjList.neighbors.size(); ++start) {
if (_adjList.neighbors[start].size() == 0) {
@ -60,8 +86,9 @@ public:
}
// Reset the state to prepare a new search.
_blocked &= clearedBlocked;
_edges &= clearedEdges;
_blocked.clear();
_edges.clear();
;
findCircuits(static_cast<PathId>(start), static_cast<PathId>(start), cycles);
}
@ -82,7 +109,7 @@ private:
// Consider only nodes which >= start node, since cycles involving nodes < start were
// discovered in earlier call of the function. We also don't want to backtrack already
// tracked edges.
if (v < start || _edges[predId]) {
if (v < start || _edges.test(predId)) {
continue;
}
@ -91,7 +118,7 @@ private:
// Found a cycle.
cycles.insert(_edges);
isCycleFound = true;
} else if (!_blocked[v]) {
} else if (!_blocked.test(v)) {
if (findCircuits(start, v, cycles)) {
isCycleFound = true;
}
@ -108,6 +135,31 @@ private:
// A node is blocked if it's currently being explored.
Bitset _blocked;
};
/**
* Returns the largest path seen in the graph + 1.
*/
size_t getNumberOfPaths(const JoinGraph& graph) {
boost::optional<PathId> maxPath;
for (const auto& edge : graph.edges()) {
for (const auto& pred : edge.predicates) {
maxPath = std::max({maxPath.value_or(0), pred.left, pred.right});
}
}
return maxPath.has_value() ? *maxPath + 1 : 0;
}
/**
* Returns a bitset, where each bit corresponds to an edge, a bit is set if its correspondong
* edgehas non-compound predicates (< 2).
*/
Bitset getEdgesWithSimplePredicates(const JoinGraph& graph) {
Bitset edgesWithSimplePredicates(graph.numEdges());
for (EdgeId edgeId = 0; edgeId != graph.numEdges(); ++edgeId) {
edgesWithSimplePredicates.set(edgeId, graph.getEdge(edgeId).predicates.size() < 2);
}
return edgesWithSimplePredicates;
}
} // namespace
JoinGraphCycles findCycles(AdjacencyList adjList) {
@ -118,75 +170,94 @@ JoinGraphCycles findCycles(AdjacencyList adjList) {
.predicates = std::move(adjList.predicates)};
}
std::vector<EdgeId> GraphCycleBreaker::breakCycles(std::vector<EdgeId> subgraph) {
// Performs a cycle detection in undirected graph using DFS. Once a cycle is detected the
// last edge detected edge of the cycle is removed to break the cycle.
//
// clean up
_seen.reset();
_edgesToRemove.clear();
std::fill(begin(_parents), end(_parents), _sentinel);
GraphCycleBreaker::GraphCycleBreaker(const JoinGraph& graph,
const EdgeSelectivities& edgeSelectivities,
size_t numPaths)
: _numEdges(graph.numEdges()) {
tassert(11509301,
"Edges selectivites are not provided for all edges",
edgeSelectivities.size() >= _numEdges);
// Calculate adjacency matrix for the subgraph.
const auto adjMatrix = makeAdjacencyMatrix(_graph, subgraph);
// Traverse the graph to find out cycles and add edges to delete in _edgesToRemove.
for (NodeId nodeId = 0; nodeId != _sentinel; ++nodeId) {
if (!_seen[nodeId]) {
visit(nodeId, adjMatrix, subgraph);
}
if (numPaths == 0) {
numPaths = getNumberOfPaths(graph);
} else if constexpr (kDebugBuild) {
tassert(
11509302, "Incorrect number of paths provided", getNumberOfPaths(graph) <= numPaths);
}
// Remove edges to break the identified cycles.
auto end = std::remove_if(subgraph.begin(), subgraph.end(), [&](int edgeId) {
return _edgesToRemove.contains(edgeId);
// 1. Create a graph based on predicates as edges and fields (PathId) as nodes.
auto adjList = makeAdjacencyList(graph, numPaths);
// 2. Finds all the cycles in the graph.
auto cycles = findCycles(std::move(adjList));
// 3. Find edges which we would prefer to delete in order to break the cycles and store them
// together with the cycles.
const auto edgesWithSimplePredicates = getEdgesWithSimplePredicates(graph);
_cycles.reserve(cycles.cycles.size());
std::transform(
cycles.cycles.begin(),
cycles.cycles.end(),
std::back_inserter(_cycles),
[this, &cycles, &edgeSelectivities, edgesWithSimplePredicates](const Bitset& cycle) {
auto edges = getEdgeBitset(cycle, cycles.predicates);
auto edge = breakCycle(edges, edgeSelectivities, edgesWithSimplePredicates);
return CycleInfo{.edges = std::move(edges), .edge = edge};
});
// 4. Sorting the cycles to make the cycle breaker consistent.
std::sort(_cycles.begin(), _cycles.end());
}
std::vector<EdgeId> GraphCycleBreaker::breakCycles(std::vector<EdgeId> subgraph) {
Bitset edgesBitset(_numEdges);
for (auto edgeId : subgraph) {
edgesBitset.set(edgeId);
}
for (const auto& cycle : _cycles) {
// The subgraph contains the current cycle if all the cycle edges are subset of the
// subgraph excluding already deleted nodes.
if (cycle.edges.isSubsetOf(edgesBitset)) {
edgesBitset.set(cycle.edge, false);
}
}
auto end = std::remove_if(
subgraph.begin(), subgraph.end(), [&](int edgeId) { return !edgesBitset.test(edgeId); });
subgraph.erase(end, subgraph.end());
return subgraph;
}
void GraphCycleBreaker::visit(NodeId nodeId,
const AdjacencyMatrix& matrix,
const std::vector<EdgeId>& edges) {
_seen.set(nodeId);
for (auto otherNodeId : iterable(matrix[nodeId], _graph.numNodes())) {
if (!_seen[otherNodeId]) {
_parents[otherNodeId] = nodeId;
visit(otherNodeId, matrix, edges);
} else if (_parents[nodeId] != otherNodeId) { // check that we don't traverse the edge back
breakCycle(/*currentNode*/ otherNodeId, /*previousNode*/ nodeId, edges);
EdgeId GraphCycleBreaker::breakCycle(const Bitset& cycleBitset,
const EdgeSelectivities& edgeSelectivities,
const Bitset& edgesWithSimplePredicates) const {
// 1. Break a cycle of 3 by removing the middle edge sorted by selectivity.
if (cycleBitset.count() == 3) {
absl::InlinedVector<std::pair<cost_based_ranker::SelectivityEstimate, EdgeId>, 3>
selectivities;
for (auto edgeId : makePopulationView(cycleBitset)) {
selectivities.emplace_back(edgeSelectivities.at(edgeId), static_cast<EdgeId>(edgeId));
}
std::sort(selectivities.begin(), selectivities.end());
return selectivities[1].second;
}
// 2. Or break a cycle by selecting a non-compound edge.
if (cycleBitset.intersects(edgesWithSimplePredicates)) {
return (cycleBitset & edgesWithSimplePredicates).findFirst();
}
// 3. Or break a cycle by selecting any edge.
return cycleBitset.findFirst();
}
void GraphCycleBreaker::breakCycle(NodeId currentNode,
NodeId previousNode,
const std::vector<EdgeId>& edges) {
// In this naive implementation of breaking cycles we just remove the last discovered node. This
// is the simpliest solution and doesn't break '_parents'. However, if we delete a node from the
// middle of the cycle we should:
// * correct the '_parents', so that if another cycle is discovered we can use '_parents' to
// iterate over the cycle;
// * call the 'visit' with the 'currentNode'.
//
// TODO SERVER-114121: We assume here's just one edge which connects these two nodes,
// which is true now. When we start supporting edges which have multiple nodes on one
// side the things will become more complicated.
auto edgeId = findEdgeId(previousNode, currentNode, edges);
tassert(11116500, "The graph edge is expected to exist", edgeId);
_edgesToRemove.insert(*edgeId);
}
boost::optional<EdgeId> GraphCycleBreaker::findEdgeId(NodeId u,
NodeId v,
const std::vector<EdgeId>& edges) const {
for (auto edgeId : edges) {
const auto& edge = _graph.getEdge(edgeId);
if ((edge.left == u && edge.right == v) || (edge.left == v && edge.right == u)) {
return edgeId;
Bitset GraphCycleBreaker::getEdgeBitset(
const Bitset& predicateBitset,
const std::vector<std::pair<EdgeId, uint16_t>>& predicates) const {
Bitset bitset{_numEdges};
for (auto index : makePopulationView(predicateBitset)) {
bitset.set(predicates[index].first);
}
}
return boost::none;
return bitset;
}
} // namespace mongo::join_ordering

View File

@ -29,7 +29,7 @@
#pragma once
#include "mongo/db/query/compiler/optimizer/join/adjacency_matrix.h"
#include "mongo/db/query/compiler/optimizer/join/cardinality_estimator.h"
#include "mongo/db/query/compiler/optimizer/join/join_graph.h"
#include "mongo/util/dynamic_bitset.h"
#include "mongo/util/modules.h"
@ -75,8 +75,29 @@ JoinGraphCycles findCycles(AdjacencyList adjList);
* subgraph to break its cycles.
*/
class GraphCycleBreaker {
struct CycleInfo {
/**
* Cycle stored as a bitset of edges forming the cycle.
*/
Bitset edges;
/**
* Edge to delete to break the cycle .
*/
EdgeId edge;
friend auto operator<=>(const CycleInfo&, const CycleInfo&) = default;
};
public:
explicit GraphCycleBreaker(const JoinGraph& graph) : _graph(graph) {}
/**
* Initializes new instance of GraphCycleBreaker for the given 'graph'.
* 'edgeSelectivities' is expected to have selectivities for all edges of the graph,
* 'numPaths' is expected to be the number of resolved paths.
*/
explicit GraphCycleBreaker(const JoinGraph& graph,
const EdgeSelectivities& edgeSelectivities,
size_t numPaths = 0);
/**
* Break cycles for the subgraph specified by the list of edges. It returns new subgraph without
@ -84,26 +105,18 @@ public:
*/
std::vector<EdgeId> breakCycles(std::vector<EdgeId> subgraph);
size_t numCycles_forTest() const {
return _cycles.size();
}
private:
/**
* Implements DFS algorithm to detect graph cycles.
*/
void visit(NodeId nodeId, const AdjacencyMatrix& matrix, const std::vector<EdgeId>& edges);
EdgeId breakCycle(const Bitset& cycleBitset,
const EdgeSelectivities& edgeSelectivities,
const Bitset& edgesWithSimplePredicates) const;
Bitset getEdgeBitset(const Bitset& predicateBitset,
const std::vector<std::pair<EdgeId, uint16_t>>& predicates) const;
/**
* Breaks a cycle which is defined by the currentNode and its parent previousNode.
*/
void breakCycle(NodeId currentNode, NodeId previousNode, const std::vector<EdgeId>& edges);
/**
* Find EdgeId that connects two specified nodes u and v.
*/
boost::optional<EdgeId> findEdgeId(NodeId u, NodeId v, const std::vector<EdgeId>& edges) const;
const JoinGraph& _graph;
const NodeId _sentinel{kMaxNodesInJoin};
NodeSet _seen;
std::array<NodeId, kMaxNodesInJoin> _parents;
absl::btree_set<EdgeId> _edgesToRemove;
size_t _numEdges;
std::vector<CycleInfo> _cycles;
};
} // namespace mongo::join_ordering

View File

@ -27,7 +27,6 @@
* it in the license file.
*/
#include "mongo/db/query/compiler/optimizer/join/graph_cycle_breaker.h"
#include "mongo/db/query/compiler/optimizer/join/unit_test_helpers.h"
@ -66,13 +65,25 @@ constexpr size_t cyclesInClique(size_t numNodes) {
class GraphCycleBreakerTest : public unittest::Test {
public:
GraphCycleBreakerTest() : graph{}, breaker(graph) {
static constexpr EdgeId kMaxEdgeId = 63;
using PathMap = absl::flat_hash_map<NodeId, PathId>;
GraphCycleBreakerTest() : graph{} {
a = addNode("a");
b = addNode("b");
c = addNode("c");
d = addNode("d");
e = addNode("e");
f = addNode("f");
defaultPaths = {{a, pa}, {b, pb}, {c, pc}, {d, pd}, {e, pe}, {f, pf}};
alternativePaths = {{a, pa1}, {b, pb1}, {c, pc1}, {d, pd1}, {e, pe1}, {f, pf1}};
for (size_t i = 0; i <= kMaxEdgeId; ++i) {
edgeSelectivities.emplace_back(
cost_based_ranker::SelectivityType{static_cast<double>(i + 1) / (2.0 * kMaxEdgeId)},
cost_based_ranker::EstimationSource::Code);
}
}
NodeId addNode(StringData collName) {
@ -80,29 +91,144 @@ public:
}
EdgeId addEdge(NodeId u, NodeId v) {
return *graph.addEdge(u, v, {});
return addEdge(u, v, defaultPaths);
}
EdgeId addEdge(NodeId u, NodeId v, PathMap pathMap) {
return *graph.addSimpleEqualityEdge(u, v, pathMap[u], pathMap[v]);
}
EdgeId addEdge(NodeId u, NodeId v, PathId uPath, PathId vPath) {
return *graph.addSimpleEqualityEdge(u, v, uPath, vPath);
}
EdgeId addCompoundEdge(NodeId u, NodeId v) {
addEdge(u, v, defaultPaths);
return addEdge(u, v, alternativePaths);
}
void validateCycles(size_t expectedNumberOfCycles) {
ASSERT_LTE(graph.numEdges(), kMaxEdgeId + 1);
GraphCycleBreaker breaker{graph, edgeSelectivities};
ASSERT_EQ(breaker.numCycles_forTest(), expectedNumberOfCycles);
}
template <typename... EdgeIds>
std::vector<EdgeId> breakCycles(EdgeIds... edges) {
return breaker.breakCycles({edges...});
ASSERT_LTE(graph.numEdges(), kMaxEdgeId + 1);
GraphCycleBreaker breaker{graph, edgeSelectivities};
auto newEdges = breaker.breakCycles({edges...});
return newEdges;
}
void setSelectivity(EdgeId edgeId, double newSelectivity) {
edgeSelectivities[edgeId] = cost_based_ranker::SelectivityEstimate(
cost_based_ranker::SelectivityType(newSelectivity),
cost_based_ranker::EstimationSource::Code);
}
static constexpr PathId p1{1}, p2{2}, pa{11}, pb{12}, pc{13}, pd{14}, pe{15}, pf{16}, pa1{21},
pb1{22}, pc1{23}, pd1{24}, pe1{25}, pf1{26}, pEnd{27};
JoinGraph graph;
GraphCycleBreaker breaker;
NodeId a, b, c, d, e, f;
absl::flat_hash_map<NodeId, PathId> defaultPaths;
absl::flat_hash_map<NodeId, PathId> alternativePaths;
EdgeSelectivities edgeSelectivities;
};
TEST_F(GraphCycleBreakerTest, GraphWithCycle) {
/**
* Tests that a middle by selectivity edge is always selected to break cycles.
*/
TEST_F(GraphCycleBreakerTest, MiddleEdge) {
auto edge_ab = addCompoundEdge(a, b);
auto edge_bc = addEdge(b, c);
auto edge_ca = addEdge(c, a);
// B-C is a middle by selectivity.
setSelectivity(edge_ab, 0.003);
setSelectivity(edge_bc, 0.005);
setSelectivity(edge_ca, 0.007);
// Case1: B-C is the middle. Selectivities: 0.003, 0.005, 0.007.
std::vector<EdgeId> expectedEdges{edge_ab, edge_ca};
auto newEdges = breakCycles(edge_ab, edge_bc, edge_ca);
std::sort(newEdges.begin(), newEdges.end());
ASSERT_EQ(newEdges, expectedEdges);
// Case 2: C-A is the middle. Selectivities: 0.003, 0.009, 0.007.
// Increase B-C selectivity which makes C-A a new middle.
setSelectivity(edge_bc, 0.009);
expectedEdges = {edge_ab, edge_bc};
newEdges = breakCycles(edge_ab, edge_bc, edge_ca);
std::sort(newEdges.begin(), newEdges.end());
ASSERT_EQ(newEdges, expectedEdges);
// Case 3: Compound edge A-B removal. Selectivities: 0.008, 0.009, 0.007.
// Increase A-B selectivity which makes A-B a new middle.
setSelectivity(edge_ab, 0.008);
expectedEdges = {edge_bc, edge_ca};
newEdges = breakCycles(edge_ab, edge_bc, edge_ca);
std::sort(newEdges.begin(), newEdges.end());
ASSERT_EQ(newEdges, expectedEdges);
}
/**
* Tests that a non-compund edge is selected if available to break a cycle of size > 3.
*/
TEST_F(GraphCycleBreakerTest, NoCompoundEdgeSelection) {
auto edge_ab = addCompoundEdge(a, b);
auto edge_bc = addCompoundEdge(b, c);
auto edge_cd = addEdge(c, d);
auto edge_da = addCompoundEdge(d, a);
// We expected that the only non-compound edge C-D will be removed to break the 4-cycle.
std::vector<EdgeId> expectedEdges{edge_ab, edge_bc, edge_da};
auto newEdges = breakCycles(edge_ab, edge_bc, edge_cd, edge_da);
std::sort(newEdges.begin(), newEdges.end());
ASSERT_EQ(newEdges, expectedEdges);
}
/**
* The cycles still breaks even if all edges are compound.
*/
TEST_F(GraphCycleBreakerTest, AllEdgesAreCompound) {
auto edge_ab = addCompoundEdge(a, b);
auto edge_bc = addCompoundEdge(b, c);
auto edge_cd = addCompoundEdge(c, d);
auto edge_da = addCompoundEdge(d, a);
auto newEdges = breakCycles(edge_ab, edge_bc, edge_cd, edge_da);
// One edge was removed to break the cycle.
ASSERT_EQ(newEdges.size(), 3);
}
TEST_F(GraphCycleBreakerTest, GraphWithComplexCycles) {
// A.a = B.a and B.a = C.a and C.a = D.a and D.a = A.a and C.a = A.a
// Three cycles:
// * A.a - B.a - C.a - D.a - A.a
// * A.a - B.a - C.a - A.a
// * A.a - C.a - D.a - A.a
auto edge_ab = addEdge(a, b);
auto edge_bc = addEdge(b, c);
auto edge_cd = addEdge(c, d);
auto edge_da = addEdge(d, a);
auto edge_ca = addEdge(c, a);
// This edge is ignored due to its incompatible predicates.
auto edge_db = addEdge(d, b, alternativePaths);
auto newEdges = breakCycles(edge_ab, edge_bc, edge_cd, edge_da, edge_ca);
validateCycles(/*expectedNumberOfCycles*/ 3);
auto newEdges = breakCycles(edge_ab, edge_bc, edge_cd, edge_da, edge_ca, edge_db);
// Two edges are expected to be removed.
ASSERT_EQ(newEdges.size(), 3);
ASSERT_EQ(newEdges.size(), 4);
newEdges = breakCycles(edge_ab, edge_bc, edge_ca);
// Subgraph case.
@ -110,40 +236,198 @@ TEST_F(GraphCycleBreakerTest, GraphWithCycle) {
}
TEST_F(GraphCycleBreakerTest, NoCycleGraph) {
// A.a = B.a and B.a = C.a
auto edge_ab = addEdge(a, b);
auto edge_bc = addEdge(b, c);
auto edge_cd = addEdge(c, d);
validateCycles(/*expectedNumberOfCycles*/ 0);
auto newEdges = breakCycles(edge_ab, edge_bc, edge_cd);
// No edges are expected to be removed.
ASSERT_EQ(newEdges.size(), 3);
}
TEST_F(GraphCycleBreakerTest, DisconnectedGraph) {
// A.a = B.a and C.a = D.a
auto edge_ab = addEdge(a, b);
auto edge_cd = addEdge(c, d);
validateCycles(/*expectedNumberOfCycles*/ 0);
auto newEdges = breakCycles(edge_ab, edge_cd);
// No edges are expected to be removed.
ASSERT_EQ(newEdges.size(), 2);
}
TEST_F(GraphCycleBreakerTest, DisconnectedGraphWithCycles) {
// A - B - C - A
// A.a - B.a - C.a - A.a
auto edge_ab = addEdge(a, b);
auto edge_bc = addEdge(b, c);
auto edge_ca = addEdge(c, a);
// D - E - F - D
// D.a - E.a - F.a - D.a
auto edge_de = addEdge(d, e);
auto edge_ef = addEdge(e, f);
auto edge_fd = addEdge(f, d);
validateCycles(/*expectedNumberOfCycles*/ 2);
auto newEdges = breakCycles(edge_ab, edge_bc, edge_ca, edge_de, edge_ef, edge_fd);
// Two edges are expected to be removed.
ASSERT_EQ(newEdges.size(), 4);
}
TEST_F(GraphCycleBreakerTest, IncompatiblePredicates) {
// Define a big cycle with compatible predicates:
// A.a = B.a and B.a = C.a and C.a = D.a and D.a = E.a and E.a = F.a and F.a = A.a
auto edge_ab = addEdge(a, b);
auto edge_bc = addEdge(b, c);
auto edge_cd = addEdge(c, d);
auto edge_de = addEdge(d, e);
auto edge_ef = addEdge(e, f);
auto edge_fa = addEdge(f, a);
// Add edges with incompatible predicates. These edges would add additional cycles in the graph
// if not their predicates:
// C.b = A.b and D.b = A.b and E.b = A.b
auto edge_ca = addEdge(c, a, alternativePaths);
auto edge_da = addEdge(d, a, alternativePaths);
auto edge_ea = addEdge(e, a, alternativePaths);
validateCycles(/*expectedNumberOfCycles*/ 1);
// One big loop case
{
auto newEdges = breakCycles(
edge_ab, edge_bc, edge_cd, edge_de, edge_ef, edge_fa, edge_ca, edge_da, edge_ea);
// Here's only one loop so only one edge is expected to be removed.
ASSERT_EQ(newEdges.size(), 8);
}
// No loops (edge_fa is missing)
{
auto newEdges =
breakCycles(edge_ab, edge_bc, edge_cd, edge_de, edge_ef, edge_ca, edge_da, edge_ea);
// No edges is expected to be removed
ASSERT_EQ(newEdges.size(), 8);
}
}
TEST_F(GraphCycleBreakerTest, TwoCyclesWithDifferentPredicates) {
// Cycle 1: A.a == B.a and B.a == C.a and C.a == D.a and D.a == A.a
auto edge_ab = addEdge(a, b);
auto edge_bc = addEdge(b, c);
auto edge_cd = addEdge(c, d);
auto edge_da = addEdge(d, a);
// Cycle 2: D.b == E.b and E.b == F.b and F.b == D.b
auto edge_de = addEdge(d, e, alternativePaths);
auto edge_ef = addEdge(e, f, alternativePaths);
auto edge_fd = addEdge(f, d, alternativePaths);
// Standalone edge.
// Because of the predicates there it doesn't form a big cycle: A - B - C - D - E - F - A
auto edge_fa = addEdge(f, a, p1, p2);
validateCycles(/*expectedNumberOfCycles*/ 2);
auto newEdges =
breakCycles(edge_ab, edge_bc, edge_cd, edge_da, edge_de, edge_ef, edge_fd, edge_fa);
// Two loops
ASSERT_EQ(newEdges.size(), 6);
}
TEST_F(GraphCycleBreakerTest, TwoCyclesWithASharedEdge) {
// Cycle 1: A.a == B.a and B.a == C.a and C.a == D.a and D.a == A.a
auto edge_ab = addEdge(a, b);
auto edge_bc = addEdge(b, c);
auto edge_cd = addEdge(c, d);
auto edge_da = addEdge(d, a);
// Cycle 2: A.b == D.b and D.b == E.b and E.b == F.b and F.b == A.b
auto edge_ad = addEdge(a, d, alternativePaths);
auto edge_de = addEdge(d, e, alternativePaths);
auto edge_ef = addEdge(e, f, alternativePaths);
auto edge_fa = addEdge(f, a, alternativePaths);
validateCycles(/*expectedNumberOfCycles*/ 2);
// The shared edge
ASSERT_EQ(edge_da, edge_ad);
auto newEdges =
breakCycles(edge_ab, edge_bc, edge_cd, edge_da, edge_ad, edge_de, edge_ef, edge_fa);
ASSERT_EQ(newEdges.size(), 6);
}
TEST_F(GraphCycleBreakerTest, CliqueOf4) {
// Define a big cycle with compatible predicates:
// A.a = B.a and B.a = C.a and C.a = D.a and D.a = E.a and E.a = F.a and C.a = A.a and D.a = A.a
// and D.a = B.a.
// A.a, B.a, C.a, D.a form a clique.
// 7 loops in the clique:
// * 001111: A - B - C - D - A
// * 010011: A - B - C - A
// * 011100: A - C - D - A
// * 100110: B - C - D - B
// * 101001: A - B - D - A
// * 110101: A - B - D - C - A
// * 111010: A - D - B - C - A
auto edge_ab = addEdge(a, b);
auto edge_bc = addEdge(b, c);
auto edge_cd = addEdge(c, d);
auto edge_da = addEdge(d, a);
auto edge_ca = addEdge(c, a);
auto edge_db = addEdge(d, b);
auto edge_de = addEdge(d, e);
auto edge_ef = addEdge(e, f);
validateCycles(/*expectedNumberOfCycles*/ cyclesInClique(4));
auto newEdges =
breakCycles(edge_ab, edge_bc, edge_cd, edge_de, edge_ef, edge_ca, edge_da, edge_db);
ASSERT_EQ(newEdges.size(), 5);
}
TEST_F(GraphCycleBreakerTest, TwoCliquesOf4) {
addEdge(a, b);
addEdge(b, c);
addEdge(c, d);
addEdge(d, a);
addEdge(c, a);
addEdge(d, b);
addEdge(d, e);
addEdge(e, f);
addEdge(a, b, alternativePaths);
addEdge(b, c, alternativePaths);
addEdge(c, d, alternativePaths);
addEdge(d, a, alternativePaths);
addEdge(c, a, alternativePaths);
addEdge(d, b, alternativePaths);
addEdge(d, e, alternativePaths);
addEdge(e, f, alternativePaths);
validateCycles(/*expectedNumberOfCycles*/ 2 * cyclesInClique(4));
}
TEST_F(GraphCycleBreakerTest, CliqueOf6) {
std::vector<NodeId> nodes{a, b, c, d, e, f};
for (auto left : nodes) {
for (auto right : nodes) {
if (left < right) {
addEdge(left, right);
}
}
}
validateCycles(/*expectedNumberOfCycles*/ cyclesInClique(6));
}
// ***************************************************
// findCycles tests

View File

@ -47,6 +47,10 @@ constexpr size_t kMaxNodesInJoin = 64;
*/
constexpr size_t kMaxEdgesInJoin = std::numeric_limits<EdgeId>::max();
/** The maximum number of predicates in a Join Graph.
*/
constexpr size_t kMaxNumberOfPredicates = std::numeric_limits<PredicateId>::max();
/** NodeSet is a bitset representation of a subset of join nodes. It is used to efficiently
* track which nodes are included in an intermediate join. This compact representation is highly
* effective for the join reordering algorithm.
@ -227,8 +231,8 @@ public:
return _nodes.size();
}
size_t numEdges() const {
return _edges.size();
EdgeId numEdges() const {
return static_cast<EdgeId>(_edges.size());
}
/** Serializes the Join Graph to BSON.

View File

@ -148,7 +148,6 @@ void PlanEnumeratorContext::addJoinPlan(PlanTreeShape type,
}
if (method == JoinMethod::INLJ) {
// TODO SERVER-115093: Change this to an equality tassert once we break cycles.
tassert(11371701, "Expected at least one edge", edges.size() >= 1);
auto edge = edges[0];
auto ie = bestIndexSatisfyingJoinPredicates(