diff --git a/src/mongo/db/query/compiler/metadata/path_arrayness.cpp b/src/mongo/db/query/compiler/metadata/path_arrayness.cpp index e6d7cdafa0d..f8438899187 100644 --- a/src/mongo/db/query/compiler/metadata/path_arrayness.cpp +++ b/src/mongo/db/query/compiler/metadata/path_arrayness.cpp @@ -33,12 +33,30 @@ using namespace mongo::multikey_paths; namespace mongo { -void PathArrayness::addPath(FieldPath path, MultikeyComponents multikeyPath) { +void PathArrayness::addPath(const FieldPath& path, const MultikeyComponents& multikeyPath) { _root.insertPath(path, multikeyPath, 0); } -bool PathArrayness::isPathArray(FieldPath path) const { - return true; +bool PathArrayness::isPathArray(const FieldPath& path) const { + return _root.isPathArray(path); +} + +bool PathArrayness::TrieNode::isPathArray(const FieldPath& path) const { + const TrieNode* current = this; + // Track the number of times we have seen an array prefix. + for (size_t depth = 0; depth < path.getPathLength(); ++depth) { + const auto pathSegment = std::string(path.getFieldName(depth)); + const auto& next = current->_children.find(pathSegment); + if (next == current->_children.end()) { + // Missing path, conservatively assume all components from this point on are arrays. + return true; + } + current = &next->second; + if (current->isArray()) { + return true; + } + } + return current->isArray(); } void PathArrayness::TrieNode::visualizeTrie(std::string fieldName, int depth) const { diff --git a/src/mongo/db/query/compiler/metadata/path_arrayness.h b/src/mongo/db/query/compiler/metadata/path_arrayness.h index 28c8d9d6732..27e80928f5b 100644 --- a/src/mongo/db/query/compiler/metadata/path_arrayness.h +++ b/src/mongo/db/query/compiler/metadata/path_arrayness.h @@ -33,7 +33,6 @@ #include "mongo/db/query/compiler/metadata/index_entry.h" namespace mongo { - /** * Data structure representing arrayness of field paths. */ @@ -49,12 +48,13 @@ public: /** * Insert a path into the trie. */ - void addPath(FieldPath path, MultikeyComponents multikeyPath); + void addPath(const FieldPath& path, const MultikeyComponents& multikeyPath); /** - * Given a path return whether it is an array. + * Given a path return whether any component of it is an array. + * For field paths that are not included in any index, assumes that the path has an array. */ - bool isPathArray(FieldPath path) const; + bool isPathArray(const FieldPath& path) const; /** * Debugging helper to visualize trie. @@ -92,6 +92,11 @@ private: return _isArray; } + /** + * Helper function to determine whether any component of a given path is an array. + */ + bool isPathArray(const FieldPath& path) const; + /** * Debugging helper to visualize trie. */ diff --git a/src/mongo/db/query/compiler/metadata/path_arrayness_test.cpp b/src/mongo/db/query/compiler/metadata/path_arrayness_test.cpp index 637a6d03dca..1e26ed0c016 100644 --- a/src/mongo/db/query/compiler/metadata/path_arrayness_test.cpp +++ b/src/mongo/db/query/compiler/metadata/path_arrayness_test.cpp @@ -35,8 +35,7 @@ namespace mongo { -TEST(ArraynessTrie, InsertIntoTrie) { - +TEST(ArraynessTrie, BuildAndLookupExistingFields) { // Array: ["a"] FieldPath field_A("a"); MultikeyComponents multikeyPaths_A{0U}; @@ -61,13 +60,19 @@ TEST(ArraynessTrie, InsertIntoTrie) { FieldPath field_BDE("b.d.e"); MultikeyComponents multikeyPaths_BDE{}; - std::vector fields{field_A, field_ABC, field_ABD, field_ABCJ, field_ABDE, field_BDE}; + // Array: ["b.d.e.f"] (Only final component is array) + FieldPath field_BDEF("b.d.e.f"); + MultikeyComponents multikeyPaths_BDEF{3U}; + + std::vector fields{ + field_A, field_ABC, field_ABD, field_ABCJ, field_ABDE, field_BDE, field_BDEF}; std::vector multikeyness{multikeyPaths_A, multikeyPaths_ABC, multikeyPaths_ABD, multikeyPaths_ABCJ, multikeyPaths_ABDE, - multikeyPaths_BDE}; + multikeyPaths_BDE, + multikeyPaths_BDEF}; PathArrayness pathArrayness; @@ -75,8 +80,48 @@ TEST(ArraynessTrie, InsertIntoTrie) { pathArrayness.addPath(fields[i], multikeyness[i]); } - pathArrayness.visualizeTrie(); - ASSERT_EQ(pathArrayness.isPathArray(field_A), true); + ASSERT_EQ(pathArrayness.isPathArray(field_ABC), true); + ASSERT_EQ(pathArrayness.isPathArray(field_ABD), true); + ASSERT_EQ(pathArrayness.isPathArray(field_ABCJ), true); + ASSERT_EQ(pathArrayness.isPathArray(field_ABDE), true); + ASSERT_EQ(pathArrayness.isPathArray(field_BDE), false); + ASSERT_EQ(pathArrayness.isPathArray(field_BDEF), true); +} + +TEST(ArraynessTrie, BuildAndLookupNonExistingFields) { + // Array: ["a", "a.b", "a.b.c"] + FieldPath field_ABC("a.b.c"); + MultikeyComponents multikeyPaths_ABC{0U, 1U, 2U}; + + // We will not insert "a.b.c.d" into the trie. + FieldPath field_ABCD("a.b.c.d"); + + std::vector fields{field_ABC}; + std::vector multikeyness{multikeyPaths_ABC}; + + PathArrayness pathArrayness; + + for (size_t i = 0; i < fields.size(); i++) { + pathArrayness.addPath(fields[i], multikeyness[i]); + } + + ASSERT_EQ(pathArrayness.isPathArray(field_ABC), true); + + // Path component "d" does not exist but it has prefix "a", "a.b" and "a.b.c" that are arrays. + ASSERT_EQ(pathArrayness.isPathArray(field_ABCD), true); +} + +TEST(ArraynessTrie, LookupEmptyTrie) { + // We will not insert any fields into the trie. + FieldPath field_A("a"); + FieldPath field_ABCD("a.b.c.d"); + std::vector fields{field_A, field_ABCD}; + + PathArrayness pathArrayness; + + // Neither of these fields or their prefixes are in the trie, so assume arrays. + ASSERT_EQ(pathArrayness.isPathArray(field_A), true); + ASSERT_EQ(pathArrayness.isPathArray(field_ABCD), true); } } // namespace mongo