diff --git a/jstests/noPassthrough/validate/drill_down_and_identify_doc.js b/jstests/noPassthrough/validate/drill_down_and_identify_doc.js index d1013e8c683..813b9327a1e 100644 --- a/jstests/noPassthrough/validate/drill_down_and_identify_doc.js +++ b/jstests/noPassthrough/validate/drill_down_and_identify_doc.js @@ -26,12 +26,12 @@ assert.commandWorked(db2.coll.update({_id: differingDocId}, {$set: {a: "hello"}} function hashDrillDown(db1, db2) { // We start off by providing an empty string prefix, and from there we drill down // into each bucket. - let prefix = ""; + let prefix = []; let count = 2; // Start off with any value > 1. while (count > 1) { - jsTest.log.info(`Drilling down with prefix: '${prefix}'`); - let partial1 = assert.commandWorked(db1.coll.validate({collHash: true, hashPrefixes: [prefix]})).partial; - let partial2 = assert.commandWorked(db2.coll.validate({collHash: true, hashPrefixes: [prefix]})).partial; + jsTest.log.info(`Drilling down with prefix: [${prefix}]`); + let partial1 = assert.commandWorked(db1.coll.validate({collHash: true, hashPrefixes: prefix})).partial; + let partial2 = assert.commandWorked(db2.coll.validate({collHash: true, hashPrefixes: prefix})).partial; jsTest.log.info("Partial1: " + tojson(partial1)); jsTest.log.info("Partial2: " + tojson(partial2)); assert.eq(Object.keys(partial1).length, Object.keys(partial2).length); @@ -42,16 +42,16 @@ function hashDrillDown(db1, db2) { } } assert.eq(differingBuckets.length, 1, tojson(differingBuckets)); - prefix = differingBuckets[0]; + prefix = differingBuckets; count = partial1[differingBuckets[0]].count; } - jsTest.log.info("Prefix of differing bucket: " + tojson(prefix)); + jsTest.log.info("Prefix of differing bucket: " + prefix); // revealHashedIds with 'prefix' should return 'differingDocId'. - const res = assert.commandWorked(db1.coll.validate({collHash: true, revealHashedIds: [prefix]})); - assert.eq(res.revealedIds[prefix].length, 1, res); - assert.eq(res.revealedIds[prefix][0], {_id: differingDocId}, res); + const res = assert.commandWorked(db1.coll.validate({collHash: true, revealHashedIds: prefix})); + assert.eq(res.revealedIds[prefix[0]].length, 1, res); + assert.eq(res.revealedIds[prefix[0]][0], {_id: differingDocId}, res); } hashDrillDown(db1, db2); diff --git a/jstests/noPassthrough/validate/unhash_basic.js b/jstests/noPassthrough/validate/revealHashedIds_basic.js similarity index 100% rename from jstests/noPassthrough/validate/unhash_basic.js rename to jstests/noPassthrough/validate/revealHashedIds_basic.js diff --git a/jstests/noPassthroughWithMongod/validate/validate_command_extended.js b/jstests/noPassthroughWithMongod/validate/validate_command_extended.js index b1d6f18674c..3767d243e99 100644 --- a/jstests/noPassthroughWithMongod/validate/validate_command_extended.js +++ b/jstests/noPassthroughWithMongod/validate/validate_command_extended.js @@ -49,11 +49,31 @@ testValidate({collHash: true, hashPrefixes: []}); assert.commandFailed(t.validate({hashPrefixes: ["aaa"]})); +assert.commandFailed(t.validate({collHash: true, hashPrefixes: [""]})); + +assert.commandFailed(t.validate({collHash: true, hashPrefixes: ["x".repeat(100)]})); + +assert.commandFailed(t.validate({collHash: true, hashPrefixes: ["random string"]})); + +assert.commandFailed(t.validate({collHash: true, hashPrefixes: ["aaa", "bb"]})); + +assert.commandFailed(t.validate({collHash: true, hashPrefixes: ["aaa", "bbb", "aaa"]})); + // Test revealHashedIds testValidate({collHash: true, revealHashedIds: ["aaa"]}); +testValidate({collHash: true, revealHashedIds: ["aaa", "bb"]}); + assert.commandFailed(t.validate({collHash: true, revealHashedIds: []})); assert.commandFailed(t.validate({revealHashedIds: ["aaa"]})); assert.commandFailed(t.validate({collHash: true, revealHashedIds: ["aaa"], hashPrefixes: ["aaa"]})); + +assert.commandFailed(t.validate({collHash: true, revealHashedIds: [""]})); + +assert.commandFailed(t.validate({collHash: true, revealHashedIds: ["x".repeat(100)]})); + +assert.commandFailed(t.validate({collHash: true, revealHashedIds: ["random string"]})); + +assert.commandFailed(t.validate({collHash: true, revealHashedIds: ["aaa", "a", "aaa"]})); diff --git a/src/mongo/db/commands/validate.cpp b/src/mongo/db/commands/validate.cpp index e46ba921e53..c036109b4b9 100644 --- a/src/mongo/db/commands/validate.cpp +++ b/src/mongo/db/commands/validate.cpp @@ -374,7 +374,6 @@ public: // collHash parameter. const bool collHash = cmdObj["collHash"].trueValue(); - // TODO (SERVER-110841): Sanitize the parameters passed as hashPrefixes. // hashPrefixes parameter. const auto rawHashPrefixes = cmdObj["hashPrefixes"]; boost::optional> hashPrefixes = boost::none; @@ -383,6 +382,7 @@ public: for (const auto& e : rawHashPrefixes.Array()) { hashPrefixes->push_back(e.String()); } + CollectionValidation::validateHashes(*hashPrefixes, /*equalLength=*/true); if (!hashPrefixes->size()) { hashPrefixes->push_back(std::string("")); } @@ -399,7 +399,6 @@ public: << " requires {collHash: true}."); } - // TODO (SERVER-110841): Sanitize prefixes in the revealHashedIds field. // revealHashedIds parameter. const auto rawRevealHashedIds = cmdObj["revealHashedIds"]; boost::optional> revealHashedIds = boost::none; @@ -433,6 +432,7 @@ public: for (const auto& e : rawRevealHashedIdsArr) { revealHashedIds->push_back(e.String()); } + CollectionValidation::validateHashes(*revealHashedIds, /*equalLength=*/false); } auto validateMode = [&] { diff --git a/src/mongo/db/validate/collection_validation.cpp b/src/mongo/db/validate/collection_validation.cpp index 8ef7737a2b5..092ddef5506 100644 --- a/src/mongo/db/validate/collection_validation.cpp +++ b/src/mongo/db/validate/collection_validation.cpp @@ -502,6 +502,53 @@ void _validateCatalogEntry(OperationContext* opCtx, } // namespace +void validateHashes(const std::vector& hashPrefixes, bool equalLength) { + if (hashPrefixes.empty()) { + return; + } + + const size_t kHashStringMaxLen = SHA256Block().toHexString().size(); + auto hashPrefixLength = hashPrefixes[0].length(); + std::vector normalizedHashPrefixes; + for (const auto& hashPrefix : hashPrefixes) { + uassert(ErrorCodes::InvalidOptions, + "Hash prefixes should not be empty strings.", + !hashPrefix.empty()); + + uassert(ErrorCodes::InvalidOptions, + fmt::format("Hash prefixes too long. Received: {}", hashPrefix), + hashPrefix.length() <= kHashStringMaxLen); + + uassert(ErrorCodes::InvalidOptions, + "Hash prefixes should not have different lengths.", + !equalLength || hashPrefix.length() == hashPrefixLength); + + std::string normalizedHashPrefix; + for (char c : hashPrefix) { + uassert(ErrorCodes::InvalidOptions, + fmt::format("Hash prefixes should only contain hex strings. Received: {}.", + hashPrefix), + ctype::isXdigit(c)); + normalizedHashPrefix.push_back(ctype::toLower(c)); + } + normalizedHashPrefixes.push_back(std::move(normalizedHashPrefix)); + } + + std::sort(normalizedHashPrefixes.begin(), normalizedHashPrefixes.end()); + for (size_t i = 0; i < normalizedHashPrefixes.size() - 1; ++i) { + const auto& currentHashPrefix = normalizedHashPrefixes[i]; + const auto& nextHashPrefix = normalizedHashPrefixes[i + 1]; + + if (currentHashPrefix.length() <= nextHashPrefix.length()) { + uassert(ErrorCodes::InvalidOptions, + fmt::format("Provided hash prefixes should not duplicate: {}, {}", + currentHashPrefix, + nextHashPrefix), + !nextHashPrefix.starts_with(currentHashPrefix)); + } + } +} + Status validate(OperationContext* opCtx, const NamespaceString& nss, ValidationOptions options, diff --git a/src/mongo/db/validate/collection_validation.h b/src/mongo/db/validate/collection_validation.h index 10f997de5a2..299be5d92bc 100644 --- a/src/mongo/db/validate/collection_validation.h +++ b/src/mongo/db/validate/collection_validation.h @@ -44,6 +44,12 @@ class ValidateResults; namespace CollectionValidation { +/** + * Checks if 'hashPrefixes' contains valid hash strings. Throws if any is invalid. + * When 'equalLength' is true, also checks all hash strings have the same length. + */ +void validateHashes(const std::vector& hashPrefixes, bool equalLength); + /** * Expects the caller to hold no locks. * diff --git a/src/mongo/db/validate/collection_validation_test.cpp b/src/mongo/db/validate/collection_validation_test.cpp index 5f952aea098..de04f4dd402 100644 --- a/src/mongo/db/validate/collection_validation_test.cpp +++ b/src/mongo/db/validate/collection_validation_test.cpp @@ -431,5 +431,79 @@ TEST_F(CollectionValidationTest, ValidateOldUniqueIndexKeyWarning) { } } +TEST_F(CollectionValidationTest, HashPrefixesEmptyString) { + ASSERT_THROWS_CODE(CollectionValidation::validateHashes({""}, /*equalLength=*/true), + DBException, + ErrorCodes::InvalidOptions); + ASSERT_THROWS_CODE(CollectionValidation::validateHashes({""}, /*equalLength=*/false), + DBException, + ErrorCodes::InvalidOptions); +} + +TEST_F(CollectionValidationTest, HashPrefixesTooLong) { + constexpr int kHashStringMaxLen = 64; + ASSERT_DOES_NOT_THROW(CollectionValidation::validateHashes( + {std::string(kHashStringMaxLen, 'A')}, /*equalLength=*/true)); + + ASSERT_THROWS_CODE(CollectionValidation::validateHashes( + {std::string(kHashStringMaxLen + 1, 'A')}, /*equalLength=*/true), + DBException, + ErrorCodes::InvalidOptions); + ASSERT_THROWS_CODE(CollectionValidation::validateHashes( + {std::string(kHashStringMaxLen + 1, 'A')}, /*equalLength=*/false), + DBException, + ErrorCodes::InvalidOptions); +} + +TEST_F(CollectionValidationTest, HashPrefixesDifferentLengths) { + ASSERT_DOES_NOT_THROW( + CollectionValidation::validateHashes({"AAA", "BBBB"}, /*equalLength=*/false)); + + ASSERT_THROWS_CODE(CollectionValidation::validateHashes({"AAA", "BBBB"}, /*equalLength=*/true), + DBException, + ErrorCodes::InvalidOptions); +} + +TEST_F(CollectionValidationTest, HashPrefixesHexString) { + ASSERT_THROWS_CODE(CollectionValidation::validateHashes({"NOTHEX"}, /*equalLength=*/true), + DBException, + ErrorCodes::InvalidOptions); + ASSERT_THROWS_CODE(CollectionValidation::validateHashes({"NOTHEX"}, /*equalLength=*/false), + DBException, + ErrorCodes::InvalidOptions); +} + +TEST_F(CollectionValidationTest, HashPrefixesDuplicates) { + ASSERT_THROWS_CODE(CollectionValidation::validateHashes({"ABC", "ABC"}, /*equalLength=*/true), + DBException, + ErrorCodes::InvalidOptions); + ASSERT_THROWS_CODE( + CollectionValidation::validateHashes({"ABC", "ABCD", "A"}, /*equalLength=*/false), + DBException, + ErrorCodes::InvalidOptions); +} + +TEST_F(CollectionValidationTest, HashPrefixesCases) { + constexpr int kHashStringMaxLen = 64; + ASSERT_DOES_NOT_THROW( + CollectionValidation::validateHashes({"aaa", "BBB", "cCc"}, /*equalLength=*/true)); + ASSERT_DOES_NOT_THROW(CollectionValidation::validateHashes( + {std::string(kHashStringMaxLen, 'a')}, /*equalLength=*/true)); + + ASSERT_THROWS_CODE(CollectionValidation::validateHashes({"aaa", "BBBB"}, /*equalLength=*/true), + DBException, + ErrorCodes::InvalidOptions); + ASSERT_THROWS_CODE(CollectionValidation::validateHashes({"nothex"}, /*equalLength=*/true), + DBException, + ErrorCodes::InvalidOptions); + ASSERT_THROWS_CODE(CollectionValidation::validateHashes({"aaa", "AAA"}, /*equalLength=*/true), + DBException, + ErrorCodes::InvalidOptions); + ASSERT_THROWS_CODE( + CollectionValidation::validateHashes({"abcd", "a", "ABCDEF"}, /*equalLength=*/true), + DBException, + ErrorCodes::InvalidOptions); +} + } // namespace } // namespace mongo