mirror of https://github.com/mongodb/mongo
SERVER-114243 Speed up SBE walkField by checking field name before callback (#44368)
Co-authored-by: Parker Felix <parker.felix@mongodb.com> GitOrigin-RevId: 265f0a3f8128627798d5a8bd6ac12a80f2ba84a3
This commit is contained in:
parent
5470ded800
commit
d03764b57c
|
|
@ -0,0 +1,13 @@
|
||||||
|
/**
|
||||||
|
* When run in SBE, extract_field_paths iterates over the Object
|
||||||
|
* typeTag for `m` to calculate `$m.m1`. Tests we don't tassert.
|
||||||
|
*/
|
||||||
|
assert.commandWorked(db.c.createIndex({"m.m1": 1}));
|
||||||
|
assert.commandWorked(
|
||||||
|
db.c.insertOne({
|
||||||
|
"m": {"m1": NumberInt(0), "m2": NumberInt(0)},
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
assert.eq(db.c.aggregate([{"$sort": {"m.m1": 1}}, {"$group": {"_id": null, "a": {"$min": "$m.m1"}}}]).toArray(), [
|
||||||
|
{"_id": null, "a": NumberInt(0)},
|
||||||
|
]);
|
||||||
|
|
@ -132,8 +132,15 @@ PlanState ExtractFieldPathsStage::getNext() {
|
||||||
if (_root->inputAccessor) {
|
if (_root->inputAccessor) {
|
||||||
// Should only be used for unit tests.
|
// Should only be used for unit tests.
|
||||||
auto [inputTag, inputVal] = _root->inputAccessor->getViewOfValue();
|
auto [inputTag, inputVal] = _root->inputAccessor->getViewOfValue();
|
||||||
value::walkObj<value::ScalarProjectionPositionInfoRecorder>(
|
|
||||||
_root.get(), inputTag, inputVal, value::bitcastTo<const char*>(inputVal), walk);
|
if (value::TypeTags::bsonObject == inputTag) {
|
||||||
|
value::walkBsonObj<value::ScalarProjectionPositionInfoRecorder>(
|
||||||
|
_root.get(), inputVal, value::bitcastTo<const char*>(inputVal), walk);
|
||||||
|
} else if (value::TypeTags::Object == inputTag) {
|
||||||
|
value::walkObject<value::ScalarProjectionPositionInfoRecorder>(
|
||||||
|
_root.get(), inputVal, walk);
|
||||||
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// Important this is only for toplevel fields. For nested fields, we would need knowledge of
|
// Important this is only for toplevel fields. For nested fields, we would need knowledge of
|
||||||
// arrayness. We would also need to check for input accessors during the tree traversal.
|
// arrayness. We would also need to check for input accessors during the tree traversal.
|
||||||
|
|
|
||||||
|
|
@ -142,8 +142,7 @@ std::vector<std::unique_ptr<CellBlock>> BSONExtractorImpl::extractFromBsons(
|
||||||
rec.newDoc();
|
rec.newDoc();
|
||||||
}
|
}
|
||||||
|
|
||||||
walkObj<BlockProjectionPositionInfoRecorder>(&_root,
|
walkBsonObj<BlockProjectionPositionInfoRecorder>(&_root,
|
||||||
TypeTags::bsonObject,
|
|
||||||
bitcastFrom<const char*>(obj.objdata()),
|
bitcastFrom<const char*>(obj.objdata()),
|
||||||
obj.objdata(),
|
obj.objdata(),
|
||||||
visitElementExtractorCallback);
|
visitElementExtractorCallback);
|
||||||
|
|
@ -221,8 +220,7 @@ std::vector<const char*> extractValuePointersFromBson(BSONObj& obj,
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
walkObj<BlockProjectionPositionInfoRecorder>(extractor.getRoot(),
|
walkBsonObj<BlockProjectionPositionInfoRecorder>(extractor.getRoot(),
|
||||||
TypeTags::bsonObject,
|
|
||||||
bitcastFrom<const char*>(obj.objdata()),
|
bitcastFrom<const char*>(obj.objdata()),
|
||||||
obj.objdata(),
|
obj.objdata(),
|
||||||
recordValuePointer);
|
recordValuePointer);
|
||||||
|
|
|
||||||
|
|
@ -257,27 +257,42 @@ void walkField(ObjectWalkNode<ProjectionRecorder>* node,
|
||||||
|
|
||||||
template <class ProjectionRecorder, class Cb>
|
template <class ProjectionRecorder, class Cb>
|
||||||
requires std::invocable<Cb&, ObjectWalkNode<ProjectionRecorder>*, TypeTags, Value, const char*>
|
requires std::invocable<Cb&, ObjectWalkNode<ProjectionRecorder>*, TypeTags, Value, const char*>
|
||||||
void walkObj(ObjectWalkNode<ProjectionRecorder>* node,
|
void walkBsonObj(ObjectWalkNode<ProjectionRecorder>* node,
|
||||||
value::TypeTags inputTag,
|
|
||||||
value::Value inputVal,
|
value::Value inputVal,
|
||||||
const char* bsonPtr,
|
const char* bsonPtr,
|
||||||
const Cb& cb) {
|
const Cb& cb) {
|
||||||
size_t numChildrenWalked = 0;
|
size_t numChildrenWalked = 0;
|
||||||
auto callback = [&](StringData currFieldName,
|
auto bson = value::getRawPointerView(inputVal);
|
||||||
value::TypeTags tag,
|
const auto end = bson::bsonEnd(bson);
|
||||||
value::Value val,
|
|
||||||
const char* cur) -> bool {
|
// Skip document length.
|
||||||
if (numChildrenWalked >= node->getChildren.size()) {
|
const char* be = bson + 4;
|
||||||
// Early exit because we've walked every child for this node.
|
while (numChildrenWalked < node->getChildren.size() && be != end - 1) {
|
||||||
return true;
|
auto fieldName = bson::fieldNameAndLength(be);
|
||||||
}
|
if (auto it = node->getChildren.find(fieldName); it != node->getChildren.end()) {
|
||||||
if (auto it = node->getChildren.find(currFieldName); it != node->getChildren.end()) {
|
auto [eltTag, eltVal] = bson::convertFrom<true>(be, end, fieldName.size());
|
||||||
walkField<ProjectionRecorder, Cb>(it->second.get(), tag, val, cur, cb);
|
walkField<ProjectionRecorder>(it->second.get(), eltTag, eltVal, be, cb);
|
||||||
numChildrenWalked++;
|
numChildrenWalked++;
|
||||||
}
|
}
|
||||||
return false;
|
be = bson::advance(be, fieldName.size());
|
||||||
};
|
}
|
||||||
value::objectForEach(inputTag, inputVal, callback);
|
}
|
||||||
|
|
||||||
|
template <class ProjectionRecorder, class Cb>
|
||||||
|
void walkObject(ObjectWalkNode<ProjectionRecorder>* node, value::Value inputVal, const Cb& cb) {
|
||||||
|
size_t numChildrenWalked = 0;
|
||||||
|
auto obj = getObjectView(inputVal);
|
||||||
|
|
||||||
|
size_t i = 0;
|
||||||
|
while (numChildrenWalked < node->getChildren.size() && i < obj->size()) {
|
||||||
|
if (auto it = node->getChildren.find(obj->field(i)); it != node->getChildren.end()) {
|
||||||
|
auto [eltTag, eltVal] = obj->getAt(i);
|
||||||
|
walkField<ProjectionRecorder>(
|
||||||
|
it->second.get(), eltTag, eltVal, nullptr /*bsonPtr*/, cb);
|
||||||
|
numChildrenWalked++;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class ProjectionRecorder, class Cb>
|
template <class ProjectionRecorder, class Cb>
|
||||||
|
|
@ -287,13 +302,12 @@ void walkField(ObjectWalkNode<ProjectionRecorder>* node,
|
||||||
Value eltVal,
|
Value eltVal,
|
||||||
const char* bsonPtr,
|
const char* bsonPtr,
|
||||||
const Cb& cb) {
|
const Cb& cb) {
|
||||||
if (value::isObject(eltTag)) {
|
if (value::TypeTags::bsonObject == eltTag) {
|
||||||
walkObj<ProjectionRecorder, Cb>(node, eltTag, eltVal, bsonPtr, cb);
|
walkBsonObj<ProjectionRecorder, Cb>(node, eltVal, bsonPtr, cb);
|
||||||
if (node->traverseChild) {
|
} else if (value::TypeTags::Object == eltTag) {
|
||||||
walkField<ProjectionRecorder, Cb>(
|
walkObject<ProjectionRecorder, Cb>(node, eltVal, cb);
|
||||||
node->traverseChild.get(), eltTag, eltVal, bsonPtr, cb);
|
|
||||||
}
|
}
|
||||||
} else if (value::isArray(eltTag)) {
|
if (value::isArray(eltTag)) {
|
||||||
if (node->traverseChild) {
|
if (node->traverseChild) {
|
||||||
// The projection traversal semantics are "special" in that the leaf must know
|
// The projection traversal semantics are "special" in that the leaf must know
|
||||||
// when there is an array higher up in the tree.
|
// when there is an array higher up in the tree.
|
||||||
|
|
@ -322,7 +336,7 @@ void walkField(ObjectWalkNode<ProjectionRecorder>* node,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (node->traverseChild) {
|
} else if (node->traverseChild) {
|
||||||
// We didn't see an array, so we apply the node below the traverse to this scalar.
|
// We didn't see an array, so we apply the node below the traverse.
|
||||||
walkField<ProjectionRecorder>(node->traverseChild.get(), eltTag, eltVal, bsonPtr, cb);
|
walkField<ProjectionRecorder>(node->traverseChild.get(), eltTag, eltVal, bsonPtr, cb);
|
||||||
}
|
}
|
||||||
// Some callbacks use the raw bson pointer, not just the tag and value.
|
// Some callbacks use the raw bson pointer, not just the tag and value.
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue