mirror of https://github.com/mongodb/mongo
SERVER-111976: Implement ArraynessTrie lookup logic (#43221)
GitOrigin-RevId: 18a7e6eafdfdc05b552b80fd7e6ba82e1b942986
This commit is contained in:
parent
6732021dce
commit
ee19cd5693
|
|
@ -33,12 +33,30 @@ using namespace mongo::multikey_paths;
|
|||
|
||||
namespace mongo {
|
||||
|
||||
void PathArrayness::addPath(FieldPath path, MultikeyComponents multikeyPath) {
|
||||
void PathArrayness::addPath(const FieldPath& path, const MultikeyComponents& multikeyPath) {
|
||||
_root.insertPath(path, multikeyPath, 0);
|
||||
}
|
||||
|
||||
bool PathArrayness::isPathArray(FieldPath path) const {
|
||||
return true;
|
||||
bool PathArrayness::isPathArray(const FieldPath& path) const {
|
||||
return _root.isPathArray(path);
|
||||
}
|
||||
|
||||
bool PathArrayness::TrieNode::isPathArray(const FieldPath& path) const {
|
||||
const TrieNode* current = this;
|
||||
// Track the number of times we have seen an array prefix.
|
||||
for (size_t depth = 0; depth < path.getPathLength(); ++depth) {
|
||||
const auto pathSegment = std::string(path.getFieldName(depth));
|
||||
const auto& next = current->_children.find(pathSegment);
|
||||
if (next == current->_children.end()) {
|
||||
// Missing path, conservatively assume all components from this point on are arrays.
|
||||
return true;
|
||||
}
|
||||
current = &next->second;
|
||||
if (current->isArray()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return current->isArray();
|
||||
}
|
||||
|
||||
void PathArrayness::TrieNode::visualizeTrie(std::string fieldName, int depth) const {
|
||||
|
|
|
|||
|
|
@ -33,7 +33,6 @@
|
|||
#include "mongo/db/query/compiler/metadata/index_entry.h"
|
||||
|
||||
namespace mongo {
|
||||
|
||||
/**
|
||||
* Data structure representing arrayness of field paths.
|
||||
*/
|
||||
|
|
@ -49,12 +48,13 @@ public:
|
|||
/**
|
||||
* Insert a path into the trie.
|
||||
*/
|
||||
void addPath(FieldPath path, MultikeyComponents multikeyPath);
|
||||
void addPath(const FieldPath& path, const MultikeyComponents& multikeyPath);
|
||||
|
||||
/**
|
||||
* Given a path return whether it is an array.
|
||||
* Given a path return whether any component of it is an array.
|
||||
* For field paths that are not included in any index, assumes that the path has an array.
|
||||
*/
|
||||
bool isPathArray(FieldPath path) const;
|
||||
bool isPathArray(const FieldPath& path) const;
|
||||
|
||||
/**
|
||||
* Debugging helper to visualize trie.
|
||||
|
|
@ -92,6 +92,11 @@ private:
|
|||
return _isArray;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to determine whether any component of a given path is an array.
|
||||
*/
|
||||
bool isPathArray(const FieldPath& path) const;
|
||||
|
||||
/**
|
||||
* Debugging helper to visualize trie.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -35,8 +35,7 @@
|
|||
|
||||
namespace mongo {
|
||||
|
||||
TEST(ArraynessTrie, InsertIntoTrie) {
|
||||
|
||||
TEST(ArraynessTrie, BuildAndLookupExistingFields) {
|
||||
// Array: ["a"]
|
||||
FieldPath field_A("a");
|
||||
MultikeyComponents multikeyPaths_A{0U};
|
||||
|
|
@ -61,13 +60,19 @@ TEST(ArraynessTrie, InsertIntoTrie) {
|
|||
FieldPath field_BDE("b.d.e");
|
||||
MultikeyComponents multikeyPaths_BDE{};
|
||||
|
||||
std::vector<FieldPath> fields{field_A, field_ABC, field_ABD, field_ABCJ, field_ABDE, field_BDE};
|
||||
// Array: ["b.d.e.f"] (Only final component is array)
|
||||
FieldPath field_BDEF("b.d.e.f");
|
||||
MultikeyComponents multikeyPaths_BDEF{3U};
|
||||
|
||||
std::vector<FieldPath> fields{
|
||||
field_A, field_ABC, field_ABD, field_ABCJ, field_ABDE, field_BDE, field_BDEF};
|
||||
std::vector<MultikeyComponents> multikeyness{multikeyPaths_A,
|
||||
multikeyPaths_ABC,
|
||||
multikeyPaths_ABD,
|
||||
multikeyPaths_ABCJ,
|
||||
multikeyPaths_ABDE,
|
||||
multikeyPaths_BDE};
|
||||
multikeyPaths_BDE,
|
||||
multikeyPaths_BDEF};
|
||||
|
||||
PathArrayness pathArrayness;
|
||||
|
||||
|
|
@ -75,8 +80,48 @@ TEST(ArraynessTrie, InsertIntoTrie) {
|
|||
pathArrayness.addPath(fields[i], multikeyness[i]);
|
||||
}
|
||||
|
||||
pathArrayness.visualizeTrie();
|
||||
|
||||
ASSERT_EQ(pathArrayness.isPathArray(field_A), true);
|
||||
ASSERT_EQ(pathArrayness.isPathArray(field_ABC), true);
|
||||
ASSERT_EQ(pathArrayness.isPathArray(field_ABD), true);
|
||||
ASSERT_EQ(pathArrayness.isPathArray(field_ABCJ), true);
|
||||
ASSERT_EQ(pathArrayness.isPathArray(field_ABDE), true);
|
||||
ASSERT_EQ(pathArrayness.isPathArray(field_BDE), false);
|
||||
ASSERT_EQ(pathArrayness.isPathArray(field_BDEF), true);
|
||||
}
|
||||
|
||||
TEST(ArraynessTrie, BuildAndLookupNonExistingFields) {
|
||||
// Array: ["a", "a.b", "a.b.c"]
|
||||
FieldPath field_ABC("a.b.c");
|
||||
MultikeyComponents multikeyPaths_ABC{0U, 1U, 2U};
|
||||
|
||||
// We will not insert "a.b.c.d" into the trie.
|
||||
FieldPath field_ABCD("a.b.c.d");
|
||||
|
||||
std::vector<FieldPath> fields{field_ABC};
|
||||
std::vector<MultikeyComponents> multikeyness{multikeyPaths_ABC};
|
||||
|
||||
PathArrayness pathArrayness;
|
||||
|
||||
for (size_t i = 0; i < fields.size(); i++) {
|
||||
pathArrayness.addPath(fields[i], multikeyness[i]);
|
||||
}
|
||||
|
||||
ASSERT_EQ(pathArrayness.isPathArray(field_ABC), true);
|
||||
|
||||
// Path component "d" does not exist but it has prefix "a", "a.b" and "a.b.c" that are arrays.
|
||||
ASSERT_EQ(pathArrayness.isPathArray(field_ABCD), true);
|
||||
}
|
||||
|
||||
TEST(ArraynessTrie, LookupEmptyTrie) {
|
||||
// We will not insert any fields into the trie.
|
||||
FieldPath field_A("a");
|
||||
FieldPath field_ABCD("a.b.c.d");
|
||||
std::vector<FieldPath> fields{field_A, field_ABCD};
|
||||
|
||||
PathArrayness pathArrayness;
|
||||
|
||||
// Neither of these fields or their prefixes are in the trie, so assume arrays.
|
||||
ASSERT_EQ(pathArrayness.isPathArray(field_A), true);
|
||||
ASSERT_EQ(pathArrayness.isPathArray(field_ABCD), true);
|
||||
}
|
||||
} // namespace mongo
|
||||
|
|
|
|||
Loading…
Reference in New Issue