mirror of https://github.com/mongodb/mongo
SERVER-115093 Implement findCycles in join graph (#45052)
GitOrigin-RevId: 3995bf1a059dad73843d0163f869c5381d51789c
This commit is contained in:
parent
1302e14758
commit
beb2aa10a4
|
|
@ -31,7 +31,93 @@
|
|||
|
||||
#include "mongo/db/query/util/bitset_util.h"
|
||||
|
||||
#include <absl/container/flat_hash_set.h>
|
||||
|
||||
namespace mongo::join_ordering {
|
||||
namespace {
|
||||
/**
|
||||
* Backtracking with prunings for finding cycles in an undirected graph.
|
||||
*/
|
||||
struct GraphCycleFinder {
|
||||
public:
|
||||
explicit GraphCycleFinder(const AdjacencyList& adjList)
|
||||
: _adjList(adjList),
|
||||
_edges(adjList.predicates.size()),
|
||||
_blocked(adjList.neighbors.size()) {}
|
||||
|
||||
absl::flat_hash_set<Bitset> findCycles() {
|
||||
// Needed to handle duplicates.
|
||||
absl::flat_hash_set<Bitset> cycles;
|
||||
|
||||
// 'cleared*' bitsets are used to clear their correponding bitsets.
|
||||
const Bitset clearedBlocked{_adjList.neighbors.size()};
|
||||
const Bitset clearedEdges{_adjList.predicates.size()};
|
||||
|
||||
// This loop tries to find cycles starting and ending with every node.
|
||||
for (size_t start = 0; start != _adjList.neighbors.size(); ++start) {
|
||||
if (_adjList.neighbors[start].size() == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Reset the state to prepare a new search.
|
||||
_blocked &= clearedBlocked;
|
||||
_edges &= clearedEdges;
|
||||
|
||||
findCircuits(static_cast<PathId>(start), static_cast<PathId>(start), cycles);
|
||||
}
|
||||
|
||||
return cycles;
|
||||
}
|
||||
|
||||
private:
|
||||
/**
|
||||
* Depth-first search algorithm that identifies all cycles which starts and end with 'start'
|
||||
* node.
|
||||
*/
|
||||
bool findCircuits(PathId start, PathId u, absl::flat_hash_set<Bitset>& cycles) {
|
||||
bool isCycleFound = false;
|
||||
_blocked.set(u, true);
|
||||
|
||||
for (const auto& [v, predId] : _adjList.neighbors[u]) {
|
||||
// Consider only nodes which >= start node, since cycles involving nodes < start were
|
||||
// discovered in earlier call of the function. We also don't want to backtrack already
|
||||
// tracked edges.
|
||||
if (v < start || _edges[predId]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
_edges.set(predId, true);
|
||||
if (v == start) {
|
||||
// Found a cycle.
|
||||
cycles.insert(_edges);
|
||||
isCycleFound = true;
|
||||
} else if (!_blocked[v]) {
|
||||
if (findCircuits(start, v, cycles)) {
|
||||
isCycleFound = true;
|
||||
}
|
||||
}
|
||||
_edges.set(predId, false);
|
||||
}
|
||||
_blocked.set(u, false);
|
||||
return isCycleFound;
|
||||
}
|
||||
|
||||
const AdjacencyList& _adjList;
|
||||
// Edges seen on the path.
|
||||
Bitset _edges;
|
||||
// A node is blocked if it's currently being explored.
|
||||
Bitset _blocked;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
JoinGraphCycles findCycles(AdjacencyList adjList) {
|
||||
GraphCycleFinder finder(adjList);
|
||||
auto cycles = finder.findCycles();
|
||||
|
||||
return JoinGraphCycles{.cycles = std::vector<Bitset>{cycles.begin(), cycles.end()},
|
||||
.predicates = std::move(adjList.predicates)};
|
||||
}
|
||||
|
||||
std::vector<EdgeId> GraphCycleBreaker::breakCycles(std::vector<EdgeId> subgraph) {
|
||||
// Performs a cycle detection in undirected graph using DFS. Once a cycle is detected the
|
||||
// last edge detected edge of the cycle is removed to break the cycle.
|
||||
|
|
|
|||
|
|
@ -31,9 +31,45 @@
|
|||
|
||||
#include "mongo/db/query/compiler/optimizer/join/adjacency_matrix.h"
|
||||
#include "mongo/db/query/compiler/optimizer/join/join_graph.h"
|
||||
#include "mongo/util/dynamic_bitset.h"
|
||||
#include "mongo/util/modules.h"
|
||||
|
||||
#include <absl/container/inlined_vector.h>
|
||||
|
||||
namespace mongo::join_ordering {
|
||||
using Bitset = DynamicBitset<size_t, 1>;
|
||||
|
||||
/**
|
||||
* A graph represented as as adjacency list and used for searching cycles of predicates in Join
|
||||
* Graph. The graph's edges correspond to Join Graph's predicates and the nodes correspond to the
|
||||
* predicate's fields represented as PathIds in Join Graph.
|
||||
*/
|
||||
struct AdjacencyList {
|
||||
/**
|
||||
* PathId -> {PathId, PredicateId}
|
||||
*/
|
||||
std::vector<absl::InlinedVector<std::pair<PathId, PredicateId>, 8>> neighbors;
|
||||
|
||||
/**
|
||||
* PredicateId -> {EdgeId, the predicate index in the edge}
|
||||
*/
|
||||
std::vector<std::pair<EdgeId, uint16_t>> predicates;
|
||||
};
|
||||
|
||||
struct JoinGraphCycles {
|
||||
/**
|
||||
* Each bit corresponds to a predicate, a set bit indicates a predicate forms part of a cycle.
|
||||
*/
|
||||
std::vector<Bitset> cycles;
|
||||
|
||||
/**
|
||||
* PredicateId -> {EdgeId, the predicate index in the edge}
|
||||
*/
|
||||
std::vector<std::pair<EdgeId, uint16_t>> predicates;
|
||||
};
|
||||
|
||||
JoinGraphCycles findCycles(AdjacencyList adjList);
|
||||
|
||||
/**
|
||||
* GraphCycleBreaker is supposed to be created one for a Join Graph and then called for each
|
||||
* subgraph to break its cycles.
|
||||
|
|
|
|||
|
|
@ -32,9 +32,38 @@
|
|||
|
||||
#include "mongo/db/query/compiler/optimizer/join/unit_test_helpers.h"
|
||||
#include "mongo/unittest/unittest.h"
|
||||
|
||||
#include "mongo/util/assert_util.h"
|
||||
|
||||
namespace mongo::join_ordering {
|
||||
namespace {
|
||||
/**
|
||||
* Return the number of cycles in a clique of 'numNodes'.
|
||||
*/
|
||||
constexpr size_t cyclesInClique(size_t numNodes) {
|
||||
switch (numNodes) {
|
||||
case 0:
|
||||
case 1:
|
||||
case 2:
|
||||
return 0;
|
||||
case 3:
|
||||
return 1;
|
||||
case 4:
|
||||
return 7;
|
||||
case 5:
|
||||
return 37;
|
||||
case 6:
|
||||
return 197;
|
||||
case 7:
|
||||
return 1172;
|
||||
case 8:
|
||||
return 8018;
|
||||
case 9:
|
||||
return 62814;
|
||||
default:
|
||||
MONGO_UNREACHABLE_TASSERT(11509310);
|
||||
}
|
||||
}
|
||||
|
||||
class GraphCycleBreakerTest : public unittest::Test {
|
||||
public:
|
||||
GraphCycleBreakerTest() : graph{}, breaker(graph) {
|
||||
|
|
@ -114,4 +143,175 @@ TEST_F(GraphCycleBreakerTest, DisconnectedGraphWithCycles) {
|
|||
// Two edges are expected to be removed.
|
||||
ASSERT_EQ(newEdges.size(), 4);
|
||||
}
|
||||
|
||||
// ***************************************************
|
||||
// findCycles tests
|
||||
|
||||
class FindCyclesTest : public unittest::Test {
|
||||
public:
|
||||
/**
|
||||
* Append a clique of size 'end' - 'begin' nodes, starting with node 'begin' to 'edges'.
|
||||
*/
|
||||
static void appendClique(PathId begin,
|
||||
PathId end,
|
||||
std::vector<std::pair<PathId, PathId>>& edges) {
|
||||
for (PathId left = begin; left != end; ++left) {
|
||||
for (PathId right = left + 1; right != end; ++right) {
|
||||
edges.emplace_back(left, right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static AdjacencyList makeAdjacencyList(const std::vector<std::pair<PathId, PathId>>& edges) {
|
||||
const auto maxPathId = [&edges]() {
|
||||
PathId maxPathId = 0;
|
||||
for (auto [left, right] : edges) {
|
||||
maxPathId = std::max({maxPathId, left, right});
|
||||
}
|
||||
return maxPathId;
|
||||
}();
|
||||
|
||||
AdjacencyList adjList;
|
||||
adjList.neighbors.resize(maxPathId + 1);
|
||||
|
||||
EdgeId edgeId = 0;
|
||||
for (auto [left, right] : edges) {
|
||||
const PredicateId predicateId = static_cast<PredicateId>(adjList.predicates.size());
|
||||
adjList.neighbors[left].emplace_back(right, predicateId);
|
||||
adjList.neighbors[right].emplace_back(left, predicateId);
|
||||
adjList.predicates.emplace_back(edgeId++, 0);
|
||||
}
|
||||
return adjList;
|
||||
}
|
||||
|
||||
void testFindCycles(AdjacencyList adjList, std::vector<Bitset> expected) {
|
||||
auto actual = findCycles(std::move(adjList));
|
||||
std::sort(actual.cycles.begin(), actual.cycles.end());
|
||||
std::sort(expected.begin(), expected.end());
|
||||
ASSERT_EQ(actual.cycles, expected);
|
||||
}
|
||||
|
||||
void testFindCycles(AdjacencyList adjList, size_t expectedNumberOfCycles) {
|
||||
auto actual = findCycles(std::move(adjList));
|
||||
ASSERT_EQ(actual.cycles.size(), expectedNumberOfCycles);
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(FindCyclesTest, GraphWithComplexCycles) {
|
||||
// Edges: A - B, B - C, C - D, D - A, C - A
|
||||
auto adjList = makeAdjacencyList({{0, 1}, {1, 2}, {2, 3}, {3, 0}, {2, 0}});
|
||||
|
||||
// Three cycles:
|
||||
// * 01111: A - B - C - D - A
|
||||
// * 10011: A - B - C - A
|
||||
// * 11100: A - C - D - A
|
||||
std::vector<Bitset> expected{Bitset{"01111"}, Bitset{"10011"}, Bitset{"11100"}};
|
||||
|
||||
testFindCycles(std::move(adjList), std::move(expected));
|
||||
}
|
||||
|
||||
TEST_F(FindCyclesTest, NoCycles) {
|
||||
// A - B, B - C
|
||||
auto adjList = makeAdjacencyList({{0, 1}, {1, 2}});
|
||||
testFindCycles(std::move(adjList), {});
|
||||
}
|
||||
|
||||
TEST_F(FindCyclesTest, DisconnectedGraph) {
|
||||
// A - B, C - D
|
||||
auto adjList = makeAdjacencyList({{0, 1}, {2, 3}});
|
||||
testFindCycles(std::move(adjList), {});
|
||||
}
|
||||
|
||||
TEST_F(FindCyclesTest, DisconnectedGraphWithCycles) {
|
||||
auto adjList = makeAdjacencyList({
|
||||
// A - B - C
|
||||
{0, 1},
|
||||
{1, 2},
|
||||
{2, 0},
|
||||
// D - E - F - D
|
||||
{3, 4},
|
||||
{4, 5},
|
||||
{5, 6},
|
||||
{6, 3},
|
||||
});
|
||||
|
||||
std::vector<Bitset> expected{Bitset{"0000111"}, Bitset{"1111000"}};
|
||||
|
||||
testFindCycles(std::move(adjList), std::move(expected));
|
||||
}
|
||||
|
||||
/**
|
||||
* A clique and some cycle that includes members of the clique.
|
||||
*/
|
||||
TEST_F(FindCyclesTest, CliqueOf4AndCycle) {
|
||||
enum Nodes { a, b, c, d, e, f };
|
||||
// Edges: 0: AB, 1: BC, 2: CD, 3: AD, 4: AC, 5: BD, 6: ED, 7: EF, 8: CF
|
||||
// A, B, C, D form a clique.
|
||||
auto adjList =
|
||||
makeAdjacencyList({{a, b}, {b, c}, {c, d}, {a, d}, {a, c}, {d, b}, {e, d}, {e, f}, {c, f}});
|
||||
|
||||
// 7 cycles in the clique:
|
||||
std::vector<Bitset> expected{
|
||||
Bitset{"000001111"}, // A - B - C - D - A
|
||||
Bitset{"000010011"}, // A - B - C - A
|
||||
Bitset{"000011100"}, // A - C - D - A
|
||||
Bitset{"000100110"}, // B - C - D - B
|
||||
Bitset{"000101001"}, // A - B - D - A
|
||||
Bitset{"000110101"}, // A - B - D - C - A
|
||||
Bitset{"000111010"}, // A - D - B - C - A
|
||||
Bitset{"111110001"}, // A - B - D - E - F - C - A
|
||||
Bitset{"111001011"}, // A - D - E - F - C - B - A
|
||||
Bitset{"111011000"}, // A - D - E - F - C - A
|
||||
Bitset{"111100010"}, // B - D - E - F - C - B
|
||||
Bitset{"111000100"}, // C - D - E - F - C
|
||||
};
|
||||
|
||||
testFindCycles(std::move(adjList), std::move(expected));
|
||||
}
|
||||
|
||||
|
||||
TEST_F(FindCyclesTest, Cliques) {
|
||||
for (PathId size = 3; size < 8; ++size) {
|
||||
std::vector<std::pair<PathId, PathId>> edges;
|
||||
appendClique(0, size, edges);
|
||||
auto adjList = makeAdjacencyList(std::move(edges));
|
||||
testFindCycles(std::move(adjList), cyclesInClique(size));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Multiple clique in a graph plus chains of nodes which do not participate in cycles, yet makes the
|
||||
* cycle search harder.
|
||||
*/
|
||||
TEST_F(FindCyclesTest, MultipleCliques) {
|
||||
PathId nextPathId = 0;
|
||||
size_t expectedNumberOfCycles = 0;
|
||||
std::vector<std::pair<PathId, PathId>> edges;
|
||||
|
||||
auto addClique = [&](auto size) {
|
||||
appendClique(nextPathId, nextPathId + size, edges);
|
||||
nextPathId += size;
|
||||
expectedNumberOfCycles += cyclesInClique(size);
|
||||
};
|
||||
|
||||
auto addChain = [&](auto size) {
|
||||
PathId end = nextPathId + size;
|
||||
for (; nextPathId != end; ++nextPathId) {
|
||||
edges.emplace_back(nextPathId, nextPathId + 1);
|
||||
}
|
||||
};
|
||||
|
||||
addChain(5);
|
||||
addClique(3);
|
||||
nextPathId += 2; // skip some nodes
|
||||
addClique(5);
|
||||
addChain(3);
|
||||
addClique(3);
|
||||
addClique(4);
|
||||
addChain(7);
|
||||
|
||||
auto adjList = makeAdjacencyList(std::move(edges));
|
||||
testFindCycles(std::move(adjList), expectedNumberOfCycles);
|
||||
}
|
||||
} // namespace
|
||||
} // namespace mongo::join_ordering
|
||||
|
|
|
|||
|
|
@ -49,6 +49,10 @@ using EdgeId = uint16_t;
|
|||
*/
|
||||
using PathId = uint16_t;
|
||||
|
||||
/** Join Predicate's unique identifier.
|
||||
*/
|
||||
using PredicateId = uint16_t;
|
||||
|
||||
struct ResolvedPath {
|
||||
NodeId nodeId;
|
||||
FieldPath fieldName;
|
||||
|
|
|
|||
Loading…
Reference in New Issue