SERVER-114243 Speed up SBE walkField by checking field name before callback (#44368)

Co-authored-by: Parker Felix <parker.felix@mongodb.com>
GitOrigin-RevId: 265f0a3f8128627798d5a8bd6ac12a80f2ba84a3
This commit is contained in:
Evan Bergeron 2025-11-25 12:16:12 -05:00 committed by MongoDB Bot
parent 5470ded800
commit d03764b57c
4 changed files with 70 additions and 38 deletions

View File

@ -0,0 +1,13 @@
/**
* When run in SBE, extract_field_paths iterates over the Object
* typeTag for `m` to calculate `$m.m1`. Tests we don't tassert.
*/
assert.commandWorked(db.c.createIndex({"m.m1": 1}));
assert.commandWorked(
db.c.insertOne({
"m": {"m1": NumberInt(0), "m2": NumberInt(0)},
}),
);
assert.eq(db.c.aggregate([{"$sort": {"m.m1": 1}}, {"$group": {"_id": null, "a": {"$min": "$m.m1"}}}]).toArray(), [
{"_id": null, "a": NumberInt(0)},
]);

View File

@ -132,8 +132,15 @@ PlanState ExtractFieldPathsStage::getNext() {
if (_root->inputAccessor) { if (_root->inputAccessor) {
// Should only be used for unit tests. // Should only be used for unit tests.
auto [inputTag, inputVal] = _root->inputAccessor->getViewOfValue(); auto [inputTag, inputVal] = _root->inputAccessor->getViewOfValue();
value::walkObj<value::ScalarProjectionPositionInfoRecorder>(
_root.get(), inputTag, inputVal, value::bitcastTo<const char*>(inputVal), walk); if (value::TypeTags::bsonObject == inputTag) {
value::walkBsonObj<value::ScalarProjectionPositionInfoRecorder>(
_root.get(), inputVal, value::bitcastTo<const char*>(inputVal), walk);
} else if (value::TypeTags::Object == inputTag) {
value::walkObject<value::ScalarProjectionPositionInfoRecorder>(
_root.get(), inputVal, walk);
}
} else { } else {
// Important this is only for toplevel fields. For nested fields, we would need knowledge of // Important this is only for toplevel fields. For nested fields, we would need knowledge of
// arrayness. We would also need to check for input accessors during the tree traversal. // arrayness. We would also need to check for input accessors during the tree traversal.

View File

@ -142,8 +142,7 @@ std::vector<std::unique_ptr<CellBlock>> BSONExtractorImpl::extractFromBsons(
rec.newDoc(); rec.newDoc();
} }
walkObj<BlockProjectionPositionInfoRecorder>(&_root, walkBsonObj<BlockProjectionPositionInfoRecorder>(&_root,
TypeTags::bsonObject,
bitcastFrom<const char*>(obj.objdata()), bitcastFrom<const char*>(obj.objdata()),
obj.objdata(), obj.objdata(),
visitElementExtractorCallback); visitElementExtractorCallback);
@ -221,8 +220,7 @@ std::vector<const char*> extractValuePointersFromBson(BSONObj& obj,
} }
}; };
walkObj<BlockProjectionPositionInfoRecorder>(extractor.getRoot(), walkBsonObj<BlockProjectionPositionInfoRecorder>(extractor.getRoot(),
TypeTags::bsonObject,
bitcastFrom<const char*>(obj.objdata()), bitcastFrom<const char*>(obj.objdata()),
obj.objdata(), obj.objdata(),
recordValuePointer); recordValuePointer);

View File

@ -257,27 +257,42 @@ void walkField(ObjectWalkNode<ProjectionRecorder>* node,
template <class ProjectionRecorder, class Cb> template <class ProjectionRecorder, class Cb>
requires std::invocable<Cb&, ObjectWalkNode<ProjectionRecorder>*, TypeTags, Value, const char*> requires std::invocable<Cb&, ObjectWalkNode<ProjectionRecorder>*, TypeTags, Value, const char*>
void walkObj(ObjectWalkNode<ProjectionRecorder>* node, void walkBsonObj(ObjectWalkNode<ProjectionRecorder>* node,
value::TypeTags inputTag,
value::Value inputVal, value::Value inputVal,
const char* bsonPtr, const char* bsonPtr,
const Cb& cb) { const Cb& cb) {
size_t numChildrenWalked = 0; size_t numChildrenWalked = 0;
auto callback = [&](StringData currFieldName, auto bson = value::getRawPointerView(inputVal);
value::TypeTags tag, const auto end = bson::bsonEnd(bson);
value::Value val,
const char* cur) -> bool { // Skip document length.
if (numChildrenWalked >= node->getChildren.size()) { const char* be = bson + 4;
// Early exit because we've walked every child for this node. while (numChildrenWalked < node->getChildren.size() && be != end - 1) {
return true; auto fieldName = bson::fieldNameAndLength(be);
} if (auto it = node->getChildren.find(fieldName); it != node->getChildren.end()) {
if (auto it = node->getChildren.find(currFieldName); it != node->getChildren.end()) { auto [eltTag, eltVal] = bson::convertFrom<true>(be, end, fieldName.size());
walkField<ProjectionRecorder, Cb>(it->second.get(), tag, val, cur, cb); walkField<ProjectionRecorder>(it->second.get(), eltTag, eltVal, be, cb);
numChildrenWalked++; numChildrenWalked++;
} }
return false; be = bson::advance(be, fieldName.size());
}; }
value::objectForEach(inputTag, inputVal, callback); }
template <class ProjectionRecorder, class Cb>
void walkObject(ObjectWalkNode<ProjectionRecorder>* node, value::Value inputVal, const Cb& cb) {
size_t numChildrenWalked = 0;
auto obj = getObjectView(inputVal);
size_t i = 0;
while (numChildrenWalked < node->getChildren.size() && i < obj->size()) {
if (auto it = node->getChildren.find(obj->field(i)); it != node->getChildren.end()) {
auto [eltTag, eltVal] = obj->getAt(i);
walkField<ProjectionRecorder>(
it->second.get(), eltTag, eltVal, nullptr /*bsonPtr*/, cb);
numChildrenWalked++;
}
i++;
}
} }
template <class ProjectionRecorder, class Cb> template <class ProjectionRecorder, class Cb>
@ -287,13 +302,12 @@ void walkField(ObjectWalkNode<ProjectionRecorder>* node,
Value eltVal, Value eltVal,
const char* bsonPtr, const char* bsonPtr,
const Cb& cb) { const Cb& cb) {
if (value::isObject(eltTag)) { if (value::TypeTags::bsonObject == eltTag) {
walkObj<ProjectionRecorder, Cb>(node, eltTag, eltVal, bsonPtr, cb); walkBsonObj<ProjectionRecorder, Cb>(node, eltVal, bsonPtr, cb);
if (node->traverseChild) { } else if (value::TypeTags::Object == eltTag) {
walkField<ProjectionRecorder, Cb>( walkObject<ProjectionRecorder, Cb>(node, eltVal, cb);
node->traverseChild.get(), eltTag, eltVal, bsonPtr, cb);
} }
} else if (value::isArray(eltTag)) { if (value::isArray(eltTag)) {
if (node->traverseChild) { if (node->traverseChild) {
// The projection traversal semantics are "special" in that the leaf must know // The projection traversal semantics are "special" in that the leaf must know
// when there is an array higher up in the tree. // when there is an array higher up in the tree.
@ -322,7 +336,7 @@ void walkField(ObjectWalkNode<ProjectionRecorder>* node,
} }
} }
} else if (node->traverseChild) { } else if (node->traverseChild) {
// We didn't see an array, so we apply the node below the traverse to this scalar. // We didn't see an array, so we apply the node below the traverse.
walkField<ProjectionRecorder>(node->traverseChild.get(), eltTag, eltVal, bsonPtr, cb); walkField<ProjectionRecorder>(node->traverseChild.get(), eltTag, eltVal, bsonPtr, cb);
} }
// Some callbacks use the raw bson pointer, not just the tag and value. // Some callbacks use the raw bson pointer, not just the tag and value.