SERVER-110841 Check validate command's hash prefixes inputs (#42125)

GitOrigin-RevId: 7d1841d377353e41f444a53b04dc327b27e49e86
This commit is contained in:
Yuhong Zhang 2025-10-06 16:30:57 -04:00 committed by MongoDB Bot
parent f6c0e13216
commit ef8d3dbd48
7 changed files with 158 additions and 11 deletions

View File

@ -26,12 +26,12 @@ assert.commandWorked(db2.coll.update({_id: differingDocId}, {$set: {a: "hello"}}
function hashDrillDown(db1, db2) { function hashDrillDown(db1, db2) {
// We start off by providing an empty string prefix, and from there we drill down // We start off by providing an empty string prefix, and from there we drill down
// into each bucket. // into each bucket.
let prefix = ""; let prefix = [];
let count = 2; // Start off with any value > 1. let count = 2; // Start off with any value > 1.
while (count > 1) { while (count > 1) {
jsTest.log.info(`Drilling down with prefix: '${prefix}'`); jsTest.log.info(`Drilling down with prefix: [${prefix}]`);
let partial1 = assert.commandWorked(db1.coll.validate({collHash: true, hashPrefixes: [prefix]})).partial; let partial1 = assert.commandWorked(db1.coll.validate({collHash: true, hashPrefixes: prefix})).partial;
let partial2 = assert.commandWorked(db2.coll.validate({collHash: true, hashPrefixes: [prefix]})).partial; let partial2 = assert.commandWorked(db2.coll.validate({collHash: true, hashPrefixes: prefix})).partial;
jsTest.log.info("Partial1: " + tojson(partial1)); jsTest.log.info("Partial1: " + tojson(partial1));
jsTest.log.info("Partial2: " + tojson(partial2)); jsTest.log.info("Partial2: " + tojson(partial2));
assert.eq(Object.keys(partial1).length, Object.keys(partial2).length); assert.eq(Object.keys(partial1).length, Object.keys(partial2).length);
@ -42,16 +42,16 @@ function hashDrillDown(db1, db2) {
} }
} }
assert.eq(differingBuckets.length, 1, tojson(differingBuckets)); assert.eq(differingBuckets.length, 1, tojson(differingBuckets));
prefix = differingBuckets[0]; prefix = differingBuckets;
count = partial1[differingBuckets[0]].count; count = partial1[differingBuckets[0]].count;
} }
jsTest.log.info("Prefix of differing bucket: " + tojson(prefix)); jsTest.log.info("Prefix of differing bucket: " + prefix);
// revealHashedIds with 'prefix' should return 'differingDocId'. // revealHashedIds with 'prefix' should return 'differingDocId'.
const res = assert.commandWorked(db1.coll.validate({collHash: true, revealHashedIds: [prefix]})); const res = assert.commandWorked(db1.coll.validate({collHash: true, revealHashedIds: prefix}));
assert.eq(res.revealedIds[prefix].length, 1, res); assert.eq(res.revealedIds[prefix[0]].length, 1, res);
assert.eq(res.revealedIds[prefix][0], {_id: differingDocId}, res); assert.eq(res.revealedIds[prefix[0]][0], {_id: differingDocId}, res);
} }
hashDrillDown(db1, db2); hashDrillDown(db1, db2);

View File

@ -49,11 +49,31 @@ testValidate({collHash: true, hashPrefixes: []});
assert.commandFailed(t.validate({hashPrefixes: ["aaa"]})); assert.commandFailed(t.validate({hashPrefixes: ["aaa"]}));
assert.commandFailed(t.validate({collHash: true, hashPrefixes: [""]}));
assert.commandFailed(t.validate({collHash: true, hashPrefixes: ["x".repeat(100)]}));
assert.commandFailed(t.validate({collHash: true, hashPrefixes: ["random string"]}));
assert.commandFailed(t.validate({collHash: true, hashPrefixes: ["aaa", "bb"]}));
assert.commandFailed(t.validate({collHash: true, hashPrefixes: ["aaa", "bbb", "aaa"]}));
// Test revealHashedIds // Test revealHashedIds
testValidate({collHash: true, revealHashedIds: ["aaa"]}); testValidate({collHash: true, revealHashedIds: ["aaa"]});
testValidate({collHash: true, revealHashedIds: ["aaa", "bb"]});
assert.commandFailed(t.validate({collHash: true, revealHashedIds: []})); assert.commandFailed(t.validate({collHash: true, revealHashedIds: []}));
assert.commandFailed(t.validate({revealHashedIds: ["aaa"]})); assert.commandFailed(t.validate({revealHashedIds: ["aaa"]}));
assert.commandFailed(t.validate({collHash: true, revealHashedIds: ["aaa"], hashPrefixes: ["aaa"]})); assert.commandFailed(t.validate({collHash: true, revealHashedIds: ["aaa"], hashPrefixes: ["aaa"]}));
assert.commandFailed(t.validate({collHash: true, revealHashedIds: [""]}));
assert.commandFailed(t.validate({collHash: true, revealHashedIds: ["x".repeat(100)]}));
assert.commandFailed(t.validate({collHash: true, revealHashedIds: ["random string"]}));
assert.commandFailed(t.validate({collHash: true, revealHashedIds: ["aaa", "a", "aaa"]}));

View File

@ -374,7 +374,6 @@ public:
// collHash parameter. // collHash parameter.
const bool collHash = cmdObj["collHash"].trueValue(); const bool collHash = cmdObj["collHash"].trueValue();
// TODO (SERVER-110841): Sanitize the parameters passed as hashPrefixes.
// hashPrefixes parameter. // hashPrefixes parameter.
const auto rawHashPrefixes = cmdObj["hashPrefixes"]; const auto rawHashPrefixes = cmdObj["hashPrefixes"];
boost::optional<std::vector<std::string>> hashPrefixes = boost::none; boost::optional<std::vector<std::string>> hashPrefixes = boost::none;
@ -383,6 +382,7 @@ public:
for (const auto& e : rawHashPrefixes.Array()) { for (const auto& e : rawHashPrefixes.Array()) {
hashPrefixes->push_back(e.String()); hashPrefixes->push_back(e.String());
} }
CollectionValidation::validateHashes(*hashPrefixes, /*equalLength=*/true);
if (!hashPrefixes->size()) { if (!hashPrefixes->size()) {
hashPrefixes->push_back(std::string("")); hashPrefixes->push_back(std::string(""));
} }
@ -399,7 +399,6 @@ public:
<< " requires {collHash: true}."); << " requires {collHash: true}.");
} }
// TODO (SERVER-110841): Sanitize prefixes in the revealHashedIds field.
// revealHashedIds parameter. // revealHashedIds parameter.
const auto rawRevealHashedIds = cmdObj["revealHashedIds"]; const auto rawRevealHashedIds = cmdObj["revealHashedIds"];
boost::optional<std::vector<std::string>> revealHashedIds = boost::none; boost::optional<std::vector<std::string>> revealHashedIds = boost::none;
@ -433,6 +432,7 @@ public:
for (const auto& e : rawRevealHashedIdsArr) { for (const auto& e : rawRevealHashedIdsArr) {
revealHashedIds->push_back(e.String()); revealHashedIds->push_back(e.String());
} }
CollectionValidation::validateHashes(*revealHashedIds, /*equalLength=*/false);
} }
auto validateMode = [&] { auto validateMode = [&] {

View File

@ -502,6 +502,53 @@ void _validateCatalogEntry(OperationContext* opCtx,
} // namespace } // namespace
void validateHashes(const std::vector<std::string>& hashPrefixes, bool equalLength) {
if (hashPrefixes.empty()) {
return;
}
const size_t kHashStringMaxLen = SHA256Block().toHexString().size();
auto hashPrefixLength = hashPrefixes[0].length();
std::vector<std::string> normalizedHashPrefixes;
for (const auto& hashPrefix : hashPrefixes) {
uassert(ErrorCodes::InvalidOptions,
"Hash prefixes should not be empty strings.",
!hashPrefix.empty());
uassert(ErrorCodes::InvalidOptions,
fmt::format("Hash prefixes too long. Received: {}", hashPrefix),
hashPrefix.length() <= kHashStringMaxLen);
uassert(ErrorCodes::InvalidOptions,
"Hash prefixes should not have different lengths.",
!equalLength || hashPrefix.length() == hashPrefixLength);
std::string normalizedHashPrefix;
for (char c : hashPrefix) {
uassert(ErrorCodes::InvalidOptions,
fmt::format("Hash prefixes should only contain hex strings. Received: {}.",
hashPrefix),
ctype::isXdigit(c));
normalizedHashPrefix.push_back(ctype::toLower(c));
}
normalizedHashPrefixes.push_back(std::move(normalizedHashPrefix));
}
std::sort(normalizedHashPrefixes.begin(), normalizedHashPrefixes.end());
for (size_t i = 0; i < normalizedHashPrefixes.size() - 1; ++i) {
const auto& currentHashPrefix = normalizedHashPrefixes[i];
const auto& nextHashPrefix = normalizedHashPrefixes[i + 1];
if (currentHashPrefix.length() <= nextHashPrefix.length()) {
uassert(ErrorCodes::InvalidOptions,
fmt::format("Provided hash prefixes should not duplicate: {}, {}",
currentHashPrefix,
nextHashPrefix),
!nextHashPrefix.starts_with(currentHashPrefix));
}
}
}
Status validate(OperationContext* opCtx, Status validate(OperationContext* opCtx,
const NamespaceString& nss, const NamespaceString& nss,
ValidationOptions options, ValidationOptions options,

View File

@ -44,6 +44,12 @@ class ValidateResults;
namespace CollectionValidation { namespace CollectionValidation {
/**
* Checks if 'hashPrefixes' contains valid hash strings. Throws if any is invalid.
* When 'equalLength' is true, also checks all hash strings have the same length.
*/
void validateHashes(const std::vector<std::string>& hashPrefixes, bool equalLength);
/** /**
* Expects the caller to hold no locks. * Expects the caller to hold no locks.
* *

View File

@ -431,5 +431,79 @@ TEST_F(CollectionValidationTest, ValidateOldUniqueIndexKeyWarning) {
} }
} }
TEST_F(CollectionValidationTest, HashPrefixesEmptyString) {
ASSERT_THROWS_CODE(CollectionValidation::validateHashes({""}, /*equalLength=*/true),
DBException,
ErrorCodes::InvalidOptions);
ASSERT_THROWS_CODE(CollectionValidation::validateHashes({""}, /*equalLength=*/false),
DBException,
ErrorCodes::InvalidOptions);
}
TEST_F(CollectionValidationTest, HashPrefixesTooLong) {
constexpr int kHashStringMaxLen = 64;
ASSERT_DOES_NOT_THROW(CollectionValidation::validateHashes(
{std::string(kHashStringMaxLen, 'A')}, /*equalLength=*/true));
ASSERT_THROWS_CODE(CollectionValidation::validateHashes(
{std::string(kHashStringMaxLen + 1, 'A')}, /*equalLength=*/true),
DBException,
ErrorCodes::InvalidOptions);
ASSERT_THROWS_CODE(CollectionValidation::validateHashes(
{std::string(kHashStringMaxLen + 1, 'A')}, /*equalLength=*/false),
DBException,
ErrorCodes::InvalidOptions);
}
TEST_F(CollectionValidationTest, HashPrefixesDifferentLengths) {
ASSERT_DOES_NOT_THROW(
CollectionValidation::validateHashes({"AAA", "BBBB"}, /*equalLength=*/false));
ASSERT_THROWS_CODE(CollectionValidation::validateHashes({"AAA", "BBBB"}, /*equalLength=*/true),
DBException,
ErrorCodes::InvalidOptions);
}
TEST_F(CollectionValidationTest, HashPrefixesHexString) {
ASSERT_THROWS_CODE(CollectionValidation::validateHashes({"NOTHEX"}, /*equalLength=*/true),
DBException,
ErrorCodes::InvalidOptions);
ASSERT_THROWS_CODE(CollectionValidation::validateHashes({"NOTHEX"}, /*equalLength=*/false),
DBException,
ErrorCodes::InvalidOptions);
}
TEST_F(CollectionValidationTest, HashPrefixesDuplicates) {
ASSERT_THROWS_CODE(CollectionValidation::validateHashes({"ABC", "ABC"}, /*equalLength=*/true),
DBException,
ErrorCodes::InvalidOptions);
ASSERT_THROWS_CODE(
CollectionValidation::validateHashes({"ABC", "ABCD", "A"}, /*equalLength=*/false),
DBException,
ErrorCodes::InvalidOptions);
}
TEST_F(CollectionValidationTest, HashPrefixesCases) {
constexpr int kHashStringMaxLen = 64;
ASSERT_DOES_NOT_THROW(
CollectionValidation::validateHashes({"aaa", "BBB", "cCc"}, /*equalLength=*/true));
ASSERT_DOES_NOT_THROW(CollectionValidation::validateHashes(
{std::string(kHashStringMaxLen, 'a')}, /*equalLength=*/true));
ASSERT_THROWS_CODE(CollectionValidation::validateHashes({"aaa", "BBBB"}, /*equalLength=*/true),
DBException,
ErrorCodes::InvalidOptions);
ASSERT_THROWS_CODE(CollectionValidation::validateHashes({"nothex"}, /*equalLength=*/true),
DBException,
ErrorCodes::InvalidOptions);
ASSERT_THROWS_CODE(CollectionValidation::validateHashes({"aaa", "AAA"}, /*equalLength=*/true),
DBException,
ErrorCodes::InvalidOptions);
ASSERT_THROWS_CODE(
CollectionValidation::validateHashes({"abcd", "a", "ABCDEF"}, /*equalLength=*/true),
DBException,
ErrorCodes::InvalidOptions);
}
} // namespace } // namespace
} // namespace mongo } // namespace mongo