SERVER-111976: Implement ArraynessTrie lookup logic (#43221)

GitOrigin-RevId: 18a7e6eafdfdc05b552b80fd7e6ba82e1b942986
This commit is contained in:
Naafiyan Ahmed 2025-10-31 15:33:07 -04:00 committed by MongoDB Bot
parent 6732021dce
commit ee19cd5693
3 changed files with 81 additions and 13 deletions

View File

@ -33,12 +33,30 @@ using namespace mongo::multikey_paths;
namespace mongo {
void PathArrayness::addPath(FieldPath path, MultikeyComponents multikeyPath) {
void PathArrayness::addPath(const FieldPath& path, const MultikeyComponents& multikeyPath) {
_root.insertPath(path, multikeyPath, 0);
}
bool PathArrayness::isPathArray(FieldPath path) const {
return true;
bool PathArrayness::isPathArray(const FieldPath& path) const {
return _root.isPathArray(path);
}
bool PathArrayness::TrieNode::isPathArray(const FieldPath& path) const {
const TrieNode* current = this;
// Track the number of times we have seen an array prefix.
for (size_t depth = 0; depth < path.getPathLength(); ++depth) {
const auto pathSegment = std::string(path.getFieldName(depth));
const auto& next = current->_children.find(pathSegment);
if (next == current->_children.end()) {
// Missing path, conservatively assume all components from this point on are arrays.
return true;
}
current = &next->second;
if (current->isArray()) {
return true;
}
}
return current->isArray();
}
void PathArrayness::TrieNode::visualizeTrie(std::string fieldName, int depth) const {

View File

@ -33,7 +33,6 @@
#include "mongo/db/query/compiler/metadata/index_entry.h"
namespace mongo {
/**
* Data structure representing arrayness of field paths.
*/
@ -49,12 +48,13 @@ public:
/**
* Insert a path into the trie.
*/
void addPath(FieldPath path, MultikeyComponents multikeyPath);
void addPath(const FieldPath& path, const MultikeyComponents& multikeyPath);
/**
* Given a path return whether it is an array.
* Given a path return whether any component of it is an array.
* For field paths that are not included in any index, assumes that the path has an array.
*/
bool isPathArray(FieldPath path) const;
bool isPathArray(const FieldPath& path) const;
/**
* Debugging helper to visualize trie.
@ -92,6 +92,11 @@ private:
return _isArray;
}
/**
* Helper function to determine whether any component of a given path is an array.
*/
bool isPathArray(const FieldPath& path) const;
/**
* Debugging helper to visualize trie.
*/

View File

@ -35,8 +35,7 @@
namespace mongo {
TEST(ArraynessTrie, InsertIntoTrie) {
TEST(ArraynessTrie, BuildAndLookupExistingFields) {
// Array: ["a"]
FieldPath field_A("a");
MultikeyComponents multikeyPaths_A{0U};
@ -61,13 +60,19 @@ TEST(ArraynessTrie, InsertIntoTrie) {
FieldPath field_BDE("b.d.e");
MultikeyComponents multikeyPaths_BDE{};
std::vector<FieldPath> fields{field_A, field_ABC, field_ABD, field_ABCJ, field_ABDE, field_BDE};
// Array: ["b.d.e.f"] (Only final component is array)
FieldPath field_BDEF("b.d.e.f");
MultikeyComponents multikeyPaths_BDEF{3U};
std::vector<FieldPath> fields{
field_A, field_ABC, field_ABD, field_ABCJ, field_ABDE, field_BDE, field_BDEF};
std::vector<MultikeyComponents> multikeyness{multikeyPaths_A,
multikeyPaths_ABC,
multikeyPaths_ABD,
multikeyPaths_ABCJ,
multikeyPaths_ABDE,
multikeyPaths_BDE};
multikeyPaths_BDE,
multikeyPaths_BDEF};
PathArrayness pathArrayness;
@ -75,8 +80,48 @@ TEST(ArraynessTrie, InsertIntoTrie) {
pathArrayness.addPath(fields[i], multikeyness[i]);
}
pathArrayness.visualizeTrie();
ASSERT_EQ(pathArrayness.isPathArray(field_A), true);
ASSERT_EQ(pathArrayness.isPathArray(field_ABC), true);
ASSERT_EQ(pathArrayness.isPathArray(field_ABD), true);
ASSERT_EQ(pathArrayness.isPathArray(field_ABCJ), true);
ASSERT_EQ(pathArrayness.isPathArray(field_ABDE), true);
ASSERT_EQ(pathArrayness.isPathArray(field_BDE), false);
ASSERT_EQ(pathArrayness.isPathArray(field_BDEF), true);
}
TEST(ArraynessTrie, BuildAndLookupNonExistingFields) {
// Array: ["a", "a.b", "a.b.c"]
FieldPath field_ABC("a.b.c");
MultikeyComponents multikeyPaths_ABC{0U, 1U, 2U};
// We will not insert "a.b.c.d" into the trie.
FieldPath field_ABCD("a.b.c.d");
std::vector<FieldPath> fields{field_ABC};
std::vector<MultikeyComponents> multikeyness{multikeyPaths_ABC};
PathArrayness pathArrayness;
for (size_t i = 0; i < fields.size(); i++) {
pathArrayness.addPath(fields[i], multikeyness[i]);
}
ASSERT_EQ(pathArrayness.isPathArray(field_ABC), true);
// Path component "d" does not exist but it has prefix "a", "a.b" and "a.b.c" that are arrays.
ASSERT_EQ(pathArrayness.isPathArray(field_ABCD), true);
}
TEST(ArraynessTrie, LookupEmptyTrie) {
// We will not insert any fields into the trie.
FieldPath field_A("a");
FieldPath field_ABCD("a.b.c.d");
std::vector<FieldPath> fields{field_A, field_ABCD};
PathArrayness pathArrayness;
// Neither of these fields or their prefixes are in the trie, so assume arrays.
ASSERT_EQ(pathArrayness.isPathArray(field_A), true);
ASSERT_EQ(pathArrayness.isPathArray(field_ABCD), true);
}
} // namespace mongo