SERVER-114243 Speed up SBE walkField by checking field name before callback (#44368)

Co-authored-by: Parker Felix <parker.felix@mongodb.com>
GitOrigin-RevId: 265f0a3f8128627798d5a8bd6ac12a80f2ba84a3
This commit is contained in:
Evan Bergeron 2025-11-25 12:16:12 -05:00 committed by MongoDB Bot
parent 5470ded800
commit d03764b57c
4 changed files with 70 additions and 38 deletions

View File

@ -0,0 +1,13 @@
/**
* When run in SBE, extract_field_paths iterates over the Object
* typeTag for `m` to calculate `$m.m1`. Tests we don't tassert.
*/
assert.commandWorked(db.c.createIndex({"m.m1": 1}));
assert.commandWorked(
db.c.insertOne({
"m": {"m1": NumberInt(0), "m2": NumberInt(0)},
}),
);
assert.eq(db.c.aggregate([{"$sort": {"m.m1": 1}}, {"$group": {"_id": null, "a": {"$min": "$m.m1"}}}]).toArray(), [
{"_id": null, "a": NumberInt(0)},
]);

View File

@ -132,8 +132,15 @@ PlanState ExtractFieldPathsStage::getNext() {
if (_root->inputAccessor) {
// Should only be used for unit tests.
auto [inputTag, inputVal] = _root->inputAccessor->getViewOfValue();
value::walkObj<value::ScalarProjectionPositionInfoRecorder>(
_root.get(), inputTag, inputVal, value::bitcastTo<const char*>(inputVal), walk);
if (value::TypeTags::bsonObject == inputTag) {
value::walkBsonObj<value::ScalarProjectionPositionInfoRecorder>(
_root.get(), inputVal, value::bitcastTo<const char*>(inputVal), walk);
} else if (value::TypeTags::Object == inputTag) {
value::walkObject<value::ScalarProjectionPositionInfoRecorder>(
_root.get(), inputVal, walk);
}
} else {
// Important this is only for toplevel fields. For nested fields, we would need knowledge of
// arrayness. We would also need to check for input accessors during the tree traversal.

View File

@ -142,8 +142,7 @@ std::vector<std::unique_ptr<CellBlock>> BSONExtractorImpl::extractFromBsons(
rec.newDoc();
}
walkObj<BlockProjectionPositionInfoRecorder>(&_root,
TypeTags::bsonObject,
walkBsonObj<BlockProjectionPositionInfoRecorder>(&_root,
bitcastFrom<const char*>(obj.objdata()),
obj.objdata(),
visitElementExtractorCallback);
@ -221,8 +220,7 @@ std::vector<const char*> extractValuePointersFromBson(BSONObj& obj,
}
};
walkObj<BlockProjectionPositionInfoRecorder>(extractor.getRoot(),
TypeTags::bsonObject,
walkBsonObj<BlockProjectionPositionInfoRecorder>(extractor.getRoot(),
bitcastFrom<const char*>(obj.objdata()),
obj.objdata(),
recordValuePointer);

View File

@ -257,27 +257,42 @@ void walkField(ObjectWalkNode<ProjectionRecorder>* node,
template <class ProjectionRecorder, class Cb>
requires std::invocable<Cb&, ObjectWalkNode<ProjectionRecorder>*, TypeTags, Value, const char*>
void walkObj(ObjectWalkNode<ProjectionRecorder>* node,
value::TypeTags inputTag,
void walkBsonObj(ObjectWalkNode<ProjectionRecorder>* node,
value::Value inputVal,
const char* bsonPtr,
const Cb& cb) {
size_t numChildrenWalked = 0;
auto callback = [&](StringData currFieldName,
value::TypeTags tag,
value::Value val,
const char* cur) -> bool {
if (numChildrenWalked >= node->getChildren.size()) {
// Early exit because we've walked every child for this node.
return true;
}
if (auto it = node->getChildren.find(currFieldName); it != node->getChildren.end()) {
walkField<ProjectionRecorder, Cb>(it->second.get(), tag, val, cur, cb);
auto bson = value::getRawPointerView(inputVal);
const auto end = bson::bsonEnd(bson);
// Skip document length.
const char* be = bson + 4;
while (numChildrenWalked < node->getChildren.size() && be != end - 1) {
auto fieldName = bson::fieldNameAndLength(be);
if (auto it = node->getChildren.find(fieldName); it != node->getChildren.end()) {
auto [eltTag, eltVal] = bson::convertFrom<true>(be, end, fieldName.size());
walkField<ProjectionRecorder>(it->second.get(), eltTag, eltVal, be, cb);
numChildrenWalked++;
}
return false;
};
value::objectForEach(inputTag, inputVal, callback);
be = bson::advance(be, fieldName.size());
}
}
template <class ProjectionRecorder, class Cb>
void walkObject(ObjectWalkNode<ProjectionRecorder>* node, value::Value inputVal, const Cb& cb) {
size_t numChildrenWalked = 0;
auto obj = getObjectView(inputVal);
size_t i = 0;
while (numChildrenWalked < node->getChildren.size() && i < obj->size()) {
if (auto it = node->getChildren.find(obj->field(i)); it != node->getChildren.end()) {
auto [eltTag, eltVal] = obj->getAt(i);
walkField<ProjectionRecorder>(
it->second.get(), eltTag, eltVal, nullptr /*bsonPtr*/, cb);
numChildrenWalked++;
}
i++;
}
}
template <class ProjectionRecorder, class Cb>
@ -287,13 +302,12 @@ void walkField(ObjectWalkNode<ProjectionRecorder>* node,
Value eltVal,
const char* bsonPtr,
const Cb& cb) {
if (value::isObject(eltTag)) {
walkObj<ProjectionRecorder, Cb>(node, eltTag, eltVal, bsonPtr, cb);
if (node->traverseChild) {
walkField<ProjectionRecorder, Cb>(
node->traverseChild.get(), eltTag, eltVal, bsonPtr, cb);
if (value::TypeTags::bsonObject == eltTag) {
walkBsonObj<ProjectionRecorder, Cb>(node, eltVal, bsonPtr, cb);
} else if (value::TypeTags::Object == eltTag) {
walkObject<ProjectionRecorder, Cb>(node, eltVal, cb);
}
} else if (value::isArray(eltTag)) {
if (value::isArray(eltTag)) {
if (node->traverseChild) {
// The projection traversal semantics are "special" in that the leaf must know
// when there is an array higher up in the tree.
@ -322,7 +336,7 @@ void walkField(ObjectWalkNode<ProjectionRecorder>* node,
}
}
} else if (node->traverseChild) {
// We didn't see an array, so we apply the node below the traverse to this scalar.
// We didn't see an array, so we apply the node below the traverse.
walkField<ProjectionRecorder>(node->traverseChild.get(), eltTag, eltVal, bsonPtr, cb);
}
// Some callbacks use the raw bson pointer, not just the tag and value.