SERVER-95687 Improve test coverage in the query subsystem for embedded null bytes (#31572)

GitOrigin-RevId: e2a76ee90404d0a37a81ccfd02ffcf47edcddef7
This commit is contained in:
Daniel Segel 2025-06-27 10:38:48 -04:00 committed by MongoDB Bot
parent 1db9c4e429
commit f5d3ae4871
4 changed files with 446 additions and 2 deletions

View File

@ -389,6 +389,8 @@ last-continuous:
ticket: SERVER-92193
- test_file: jstests/core/query/find_all_values_at_path_expression.js
ticket: SERVER-95670
- test_file: jstests/core/timeseries/timeseries_explicit_unpack_bucket.js
ticket: SERVER-95687
- test_file: jstests/change_streams/update_lookup_before_ddl.js
ticket: SERVER-95976
- test_file: jstests/change_streams/lookup_post_image.js
@ -844,6 +846,8 @@ last-lts:
ticket: SERVER-92193
- test_file: jstests/core/query/find_all_values_at_path_expression.js
ticket: SERVER-95670
- test_file: jstests/core/timeseries/timeseries_explicit_unpack_bucket.js
ticket: SERVER-95687
- test_file: jstests/change_streams/update_lookup_before_ddl.js
ticket: SERVER-95976
- test_file: jstests/change_streams/lookup_post_image.js

View File

@ -0,0 +1,338 @@
/**
* Tests how various aggregation expressions and stages that take strings and parameters respond to
* string input containing null bytes.
*/
const coll = db[jsTestName()];
coll.drop();
assert.commandWorked(coll.insert({_id: 1, foo: 1}));
const nullByteStrs = [
// Starts with null chars.
"\x00a",
// Ends with null chars.
"a\x00",
// All null chars.
"\x00",
"\x00\x00\x00",
// Null chars somewhere in the middle.
"a\x00\x01\x08a",
"a\x00\x02\x08b",
"a\x00\x01\x18b",
"a\x00\x01\x28c",
"a\x00\x01\x03d\x00\xff\xff\xff\xff\x00\x08b",
];
function getStringUses(str) {
return [str, `foo.${str}`];
}
function getFieldUses(str) {
return [`$${str}`, `$foo.${str}`];
}
function getAllUses(str) {
return [...getStringUses(str), ...getFieldUses(str)];
}
// Confirm that the JavaScript engine in the shell fails to construct the JS object because
// 'JavaScript property (name) contains a null char which is not allowed in BSON'.
function getShellErrorPipelines(nullStr) {
return [
[{$documents: [{[nullStr]: "foo"}], $match: {}}],
[{$facet: {[nullStr]: [{$match: {}}]}}],
[{$fill: {output: {[nullStr]: {value: "foo"}}}}],
[{$fill: {sortBy: {[nullStr]: 1}, output: {[nullStr]: {value: "foo"}}}}],
[{$group: {_id: "$foo", [nullStr]: {$sum: "$bar"}}}],
[{$match: {[nullStr]: "foo"}}],
[{$match: {$or: [{"foo": "bar"}, {[nullStr]: "baz"}]}}],
[{
$match:
{$jsonSchema: {required: ["foo"], properties: {[nullStr]: {bsonType: "string"}}}}
}],
[{$merge: {into: "coll", on: "_id", let : {[nullStr]: "$foo"}}}],
[{$project: {[nullStr]: 1}}],
[{$project: {result: {$let: {vars: {[nullStr]: "$foo"}, in : "$$nullStr"}}}}],
[{$replaceRoot: {newRoot: {[nullStr]: "$foo"}}}],
[{$replaceWith: {[nullStr]: "$foo"}}],
[{$set: {[nullStr]: "$foo"}}],
[{$setWindowFields: {sortBy: {[nullStr]: 1}, output: {count: {$sum: 1}}}}],
[{$setWindowFields: {output: {[nullStr]: {count: {$sum: 1}}}}}],
[{$sort: {[nullStr]: 1}}],
[{$unset: {[nullStr]: ""}}]
];
}
for (const nullStr of nullByteStrs) {
for (const str of getAllUses(nullStr)) {
for (const pipeline of getShellErrorPipelines(str)) {
assert.throwsWithCode(() => coll.aggregate(pipeline), 16985);
}
}
}
// Certain expressions and stages are valid when passed a literal string that contains a null byte,
// but are invalid when the string is a reference to a field name.
function getFieldPathErrorPipelines(nullStr) {
let pipelines = [
[{$addFields: {field: nullStr}}],
[{$addFields: {hashedVal: {$toHashedIndexKey: nullStr}}}],
[{$set: {field: nullStr}}],
[{$group: {_id: nullStr}}],
[{$bucket: {groupBy: "$foo", boundaries: [nullStr, nullStr + "1"], default: "Other"}}],
[{$bucket: {groupBy: "$foo", boundaries: [0, 5, 10], default: nullStr}}],
[{
$fill:
{partitionBy: {bar: nullStr}, sortBy: {foo: 1}, output: {out: {method: "linear"}}}
}],
[{$setWindowFields: {partitionBy: nullStr, output: {count: {$sum: 1}}}}],
];
const nullStrComparisons = [
{$eq: ["foo", nullStr]},
{$ne: ["foo", nullStr]},
{$gt: ["foo", nullStr]},
{$gte: ["foo", nullStr]},
{$lt: ["foo", nullStr]},
{$lte: ["foo", nullStr]},
{$in: ["foo", [nullStr]]}
];
pipelines =
pipelines.concat(nullStrComparisons.map(expr => [{$match: {$expr: {field: expr}}}]));
const expressionTests = [
{$concat: [nullStr, "foo"]},
{$ltrim: {input: nullStr}},
{$max: ["foo", nullStr]},
{$min: ["foo", nullStr]},
{$rtrim: {input: nullStr}},
{$substr: [nullStr, 0, 1]},
{$substrBytes: [nullStr, 0, 1]},
{$substrCP: [nullStr, 0, 1]},
{$strcasecmp: [nullStr, "foo"]},
{$trim: {input: nullStr}},
{$toLower: nullStr},
{$toString: nullStr},
{$toUpper: nullStr},
{$reduce: {input: [nullStr], initialValue: "", in : ""}},
{$reduce: {input: ["foo"], initialValue: nullStr, in : ""}},
{$reduce: {input: ["foo"], initialValue: "", in : nullStr}},
{$regexMatch: {input: nullStr, regex: "foo"}},
{$getField: nullStr},
];
return pipelines.concat(expressionTests.map(operator => [{$project: {field: operator}}]));
}
// Confirm the behavior for all the pipelines that should succeed with null-byte literal strings and
// fail with field path expressions containing a null byte.
for (const nullStr of nullByteStrs) {
for (const str of getStringUses(nullStr)) {
for (const pipeline of getFieldPathErrorPipelines(str)) {
assert.commandWorked(coll.runCommand('aggregate', {pipeline: pipeline, cursor: {}}));
}
}
// When there is an embedded null byte in a field path, we expect error code 16411 in
// particular.
for (const field of getFieldUses(nullStr)) {
for (const pipeline of getFieldPathErrorPipelines(field)) {
assert.throwsWithCode(() => coll.aggregate(pipeline), [16411, 9423101]);
}
}
}
// Return expressions that should always fail when passed a string (literal or field name)
// containing a null byte.
function getErrorPipelines(nullStr) {
return [
{
pipeline: [{$bucket: {groupBy: nullStr, boundaries: [0, 5, 10], default: "Other"}}],
codes: [40202, 16411]
},
{
pipeline: [{$bucketAuto: {groupBy: nullStr, buckets: 5, output: {count: {$sum: 1}}}}],
codes: [40239, 16411]
},
{pipeline: [{$changeStream: {fullDocument: nullStr}}], codes: [ErrorCodes.BadValue]},
{
pipeline: [{$changeStream: {fullDocumentBeforeChange: nullStr}}],
codes: [ErrorCodes.BadValue]
},
{pipeline: [{$count: nullStr}], codes: [40159, 40158]},
{
pipeline: [{$densify: {field: nullStr, range: {step: 1, bounds: "full"}}}],
codes: [16411, 16410]
},
{
pipeline: [{$densify: {field: "foo", range: {step: 1, bounds: nullStr}}}],
codes: [5946802]
},
{
pipeline: [{
$densify: {
field: "foo",
partitionByFields: [nullStr],
range: {step: 1, bounds: "full"}
}
}],
codes: [16411, 16410, 8993000]
},
{
pipeline: [{$fill: {partitionByFields: [nullStr], output: {foo: {value: "bar"}}}}],
codes: [9527900]
},
{
pipeline:
[{$geoNear: {near: {type: "Point", coordinates: [0, 0]}, distanceField: nullStr}}],
codes: [16411, 16410]
},
{
pipeline: [{
$geoNear: {
near: {type: "Point", coordinates: [0, 0]},
distanceField: "foo",
includeLocs: nullStr
}
}],
codes: [16411, 16410]
},
{
pipeline: [{
$graphLookup: {
from: nullStr,
startWith: "$foo",
connectFromField: "parentId",
connectToField: "_id",
as: "results"
}
}],
codes: [ErrorCodes.InvalidNamespace]
},
{
pipeline: [{
$graphLookup: {
from: "coll",
startWith: "$foo",
connectFromField: nullStr,
connectToField: "_id",
as: "results"
}
}],
codes: [16411, 16410]
},
{
pipeline: [{
$graphLookup: {
from: "coll",
startWith: "$foo",
connectFromField: "parentId",
connectToField: nullStr,
as: "results"
}
}],
codes: [16411, 16410]
},
{
pipeline: [{
$graphLookup: {
from: "coll",
startWith: "$foo",
connectFromField: "parentId",
connectToField: "_id",
as: nullStr
}
}],
codes: [16411, 16410]
},
{
pipeline: [{
$graphLookup: {
from: "coll",
startWith: "$foo",
connectFromField: "parentId",
connectToField: "_id",
as: "results",
depthField: nullStr
}
}],
codes: [16411, 16410]
},
{
pipeline: [{
$lookup: {
from: nullStr,
localField: "local",
foreignField: "foreign",
as: "result"
}
}],
codes: [ErrorCodes.InvalidNamespace]
},
{
pipeline: [{
$lookup: {
from: "foo",
localField: nullStr,
foreignField: "foreign",
as: "result"
}
}],
codes: [16411, 16410]
},
{
pipeline: [{
$lookup: {
from: "foo",
localField: "local",
foreignField: nullStr,
as: "result"
}
}],
codes: [16411, 16410]
},
{
pipeline: [{
$lookup: {
from: "foo",
localField: "local",
foreignField: "foreign",
as: nullStr
}
}],
codes: [16411, 16410]
},
{pipeline: [{$merge: {into: nullStr}}], codes: [ErrorCodes.InvalidNamespace]},
{pipeline: [{$merge: {into: "coll", on: nullStr}}], codes: [16411, 16410]},
{pipeline: [{$out: {db: nullStr, coll: "coll"}}], codes: [ErrorCodes.InvalidNamespace]},
{pipeline: [{$out: {db: "db", coll: nullStr}}], codes: [ErrorCodes.InvalidNamespace]},
{
pipeline:
[{$project: {field: {$setField: {field: nullStr, input: {}, value: "newField"}}}}],
codes: [9534700, 16411]
},
{
pipeline: [{$project: {field: {$unsetField: {field: nullStr, input: {}}}}}],
codes: [9534700, 16411]
},
{
pipeline: [{$project: {matches: {$regexMatch: {input: "$foo", regex: nullStr}}}}],
codes: [51109, 16411]
},
{pipeline: [{$replaceRoot: {newRoot: nullStr}}], codes: [40228, 16411, 8105800]},
{pipeline: [{$replaceWith: nullStr}], codes: [40228, 16411, 8105800]},
{pipeline: [{$sortByCount: nullStr}], codes: [40148, 16411]},
{pipeline: [{$unionWith: {coll: nullStr}}], codes: [ErrorCodes.InvalidNamespace]},
{pipeline: [{$unwind: {path: nullStr}}], codes: [28818, 16419]},
{pipeline: [{$unwind: {path: "$foo", includeArrayIndex: nullStr}}], codes: [16411, 28822]},
];
}
// Confirm the "error pipelines" always throw an exception.
for (const nullStr of nullByteStrs) {
for (const strOrField of getAllUses(nullStr)) {
for (const {pipeline, codes} of getErrorPipelines(strOrField)) {
assert.throwsWithCode(() => coll.aggregate(pipeline), [...codes, 9423101]);
}
}
}

View File

@ -155,4 +155,80 @@ assert.commandFailedWithCode(
assert.commandFailedWithCode(
assert.throws(() => sysCollWithMeta.aggregate([{$_unpackBucket: {timeField: "start"}}])),
5369601);
// Collection creation fails if 'timeField' or 'metaField' contains embedded null bytes.
const tsCollNullByte = testDB.getCollection("tsCollNullByte");
assert.commandFailedWithCode(
assert.throws(() => assert.commandWorked(testDB.createCollection(
tsCollNullByte.getName(),
{timeseries: {timeField: "a\x00b", metaField: "tags"}}))),
ErrorCodes.BadValue);
assert.commandFailedWithCode(
assert.throws(() => assert.commandWorked(testDB.createCollection(
tsCollNullByte.getName(),
{timeseries: {timeField: "time", metaField: "a\x00b"}}))),
ErrorCodes.BadValue);
// $_unpackBucket fails if timeField or metaField contains null bytes.
assert.commandFailedWithCode(
assert.throws(() => sysCollWithMeta.aggregate(
[{$_unpackBucket: {timeField: "a\x00b", metaField: "tags"}}])),
9568703);
assert.commandFailedWithCode(
assert.throws(() => sysCollWithMeta.aggregate(
[{$_unpackBucket: {timeField: "time", metaField: "a\x00b"}}])),
9568704);
// $_internalUnpackBucket fails if timeField or metaField contains null bytes.
assert.commandFailedWithCode(assert.throws(() => sysCollWithMeta.aggregate([{
$_internalUnpackBucket: {
timeField: "a\x00b",
metaField: "tags",
bucketMaxSpanSeconds: NumberInt(3600)
}
}])),
9568701);
assert.commandFailedWithCode(assert.throws(() => sysCollWithMeta.aggregate([{
$_internalUnpackBucket: {
timeField: "time",
metaField: "a\x00b",
bucketMaxSpanSeconds: NumberInt(3600)
}
}])),
9568702);
// $_internalUnpackBucket fails if include or exclude contains null bytes.
// "include"
assert.commandFailedWithCode(assert.throws(() => sysCollWithMeta.aggregate([{
$_internalUnpackBucket: {
include: ["start", "invalid_\x00"],
timeField: "start",
metaField: "tags",
bucketMaxSpanSeconds: NumberInt(3600)
}
}])),
9568705);
// "exclude"
assert.commandFailedWithCode(assert.throws(() => sysCollWithMeta.aggregate([{
$_internalUnpackBucket: {
exclude: ["start", "value_\x00"],
timeField: "start",
metaField: "tags",
bucketMaxSpanSeconds: NumberInt(3600)
}
}])),
9568705);
// $_internalUnpackBucket fails if computedMetaProjFields contains null bytes.
assert.commandFailedWithCode(assert.throws(() => sysCollWithMeta.aggregate([{
$_internalUnpackBucket: {
timeField: "time",
metaField: "tags",
computedMetaProjFields: ["invalid_\x00_field"]
}
}])),
9568706);
})();

View File

@ -324,6 +324,10 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF
uassert(5346503,
"include or exclude field element must be a single-element field path",
field.find('.') == std::string::npos);
// TODO SERVER-98589: Remove when BSON field name type is implemented.
uassert(9568705,
"include or exclude field element must not contain an embedded null byte",
field.find('\0') == std::string::npos);
bucketSpec.addIncludeExcludeField(field);
}
bucketSpec.setBehavior(fieldName == kInclude ? BucketSpec::Behavior::kInclude
@ -338,7 +342,12 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF
uassert(5346504,
str::stream() << "timeField field must be a string, got: " << elem.type(),
elem.type() == BSONType::String);
bucketSpec.setTimeField(elem.str());
auto timeField = elem.str();
// TODO SERVER-98589: Remove when BSON field name type is implemented.
uassert(9568701,
str::stream() << "timeField must not contain an embedded null byte",
timeField.find('\0') == std::string::npos);
bucketSpec.setTimeField(std::move(timeField));
hasTimeField = true;
} else if (fieldName == timeseries::kMetaFieldName) {
uassert(5346505,
@ -348,6 +357,10 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF
uassert(5545700,
str::stream() << "metaField field must be a single-element field path",
metaField.find('.') == std::string::npos);
// TODO SERVER-98589: Remove when BSON field name type is implemented.
uassert(9568702,
str::stream() << "metaField field must not contain an embedded null byte",
metaField.find('\0') == std::string::npos);
bucketSpec.setMetaField(std::move(metaField));
} else if (fieldName == kBucketMaxSpanSeconds) {
uassert(5510600,
@ -375,6 +388,10 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF
uassert(5509902,
"computedMetaProjFields field element must be a single-element field path",
field.find('.') == std::string::npos);
// TODO SERVER-98589: Remove when BSON field name type is implemented.
uassert(9568706,
"computedMetaProjFields field must not contain an embedded null byte",
field.find('\0') == std::string::npos);
bucketSpec.addComputedMetaProjFields(field);
}
} else if (fieldName == kIncludeMinTimeAsMetadata) {
@ -443,10 +460,15 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF
auto fieldName = elem.fieldNameStringData();
// We only expose "timeField" and "metaField" as parameters in $_unpackBucket.
if (fieldName == timeseries::kTimeFieldName) {
auto timeField = elem.str();
uassert(5612401,
str::stream() << "timeField field must be a string, got: " << elem.type(),
elem.type() == BSONType::String);
bucketSpec.setTimeField(elem.str());
// TODO SERVER-98589: Remove when BSON field name type is implemented.
uassert(9568703,
str::stream() << "timeField must not contain an embedded null byte",
timeField.find('\0') == std::string::npos);
bucketSpec.setTimeField(std::move(timeField));
hasTimeField = true;
} else if (fieldName == timeseries::kMetaFieldName) {
uassert(5612402,
@ -456,6 +478,10 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceInternalUnpackBucket::createF
uassert(5612403,
str::stream() << "metaField field must be a single-element field path",
metaField.find('.') == std::string::npos);
// TODO SERVER-98589: Remove when BSON field name type is implemented.
uassert(9568704,
str::stream() << "metaField field must not contain an embedded null byte",
metaField.find('\0') == std::string::npos);
bucketSpec.setMetaField(std::move(metaField));
} else if (fieldName == kAssumeNoMixedSchemaData) {
uassert(6067203,