SERVER-86326 Increase regex max pattern length to 32764 characters (#44449)

GitOrigin-RevId: 19e9132f7fe3624a918277af071bbcf616801244
This commit is contained in:
Jan 2025-12-03 10:04:07 +01:00 committed by MongoDB Bot
parent 3d7559f185
commit 4bc844ad14
5 changed files with 72 additions and 7 deletions

View File

@ -97,6 +97,8 @@ last-continuous:
ticket: SERVER-109322
- test_file: jstests/sharding/reshard_collection_with_zones_stale_mongos.js
ticket: SERVER-109322
- test_file: jstests/core/query/regex/regex_max_pattern_length.js
ticket: SERVER-86326
suites: null
last-lts:
all:
@ -696,4 +698,6 @@ last-lts:
ticket: SERVER-109322
- test_file: jstests/sharding/reshard_collection_with_zones_stale_mongos.js
ticket: SERVER-109322
- test_file: jstests/core/query/regex/regex_max_pattern_length.js
ticket: SERVER-86326
suites: null

View File

@ -36,15 +36,18 @@ function testRegexAggException(inputObj, exceptionCode, expression) {
}
(function testLongRegex() {
// Our limit on regex pattern length is 2^14.
// The maximum allowed pattern length is 16384 before SERVER-86326, and 32764 with it.
// Using a limit of 16384 should work across all versions in multiversion tests.
const kMaxRegexPatternLen = 16384;
const patternMaxLen = "c".repeat(kMaxRegexPatternLen);
const patternMaxLen = "c".repeat(kMaxRegexPatternLen - 1);
const anchoredPatternMaxLen = "^" + patternMaxLen;
// Test that a regex with maximum allowable pattern length can find a document.
testRegexAgg({input: "$z", regex: patternMaxLen}, [{match: patternMaxLen, "idx": 0, "captures": []}]);
testRegexAgg({input: "$z", regex: anchoredPatternMaxLen}, [{match: patternMaxLen, "idx": 0, "captures": []}]);
// Test that a regex pattern exceeding the limit fails.
const patternTooLong = patternMaxLen + "c";
// Test that a regex pattern exceeding the limit fails. A pattern length of 32765 is
// disallowed in all versions in multiversion tests.
const patternTooLong = "c".repeat(32765);
testRegexAggException({input: "$z", regex: patternTooLong}, 51111);
})();

View File

@ -0,0 +1,30 @@
/**
* @tags: [
* assumes_read_concern_local,
* # Retryable writes are required for the remove() command in the test.
* requires_non_retryable_writes,
* ]
*/
const kCollName = jsTestName();
const kMaxOldRegexPatternLength = 16384;
const buildAnchoredPattern = (length) => "^" + "y".repeat(length - 1);
const coll = db[kCollName];
coll.drop();
// Assume that the following queries succeed, despite long patterns being used.
[0, 1, 100, 1000, 10000].forEach((length) => {
coll.remove({});
assert.commandWorked(coll.insert({a: "y".repeat(kMaxOldRegexPatternLength + length)}));
assert.eq(1, coll.find({a: {$regex: buildAnchoredPattern(kMaxOldRegexPatternLength + length)}}).itcount());
});
// Using a too long pattern will always fail.
let error = assert.throws(() => coll.find({a: {$regex: buildAnchoredPattern(32767)}}).itcount());
assert.commandFailedWithCode(error, 51091);
assert(
error.message.includes(
"Regular expression is invalid: pattern string is longer than the limit set by the application",
),
);

View File

@ -125,8 +125,9 @@ const std::error_category& pcreCategory() noexcept {
namespace detail {
// Global.
inline constexpr size_t kMaxPatternLength = 16384;
// Maximum length of the pattern accepted by PCRE2, in bytes. The effectively usable pattern length
// may be less than this value.
constexpr size_t kMaxPatternLength = 32764;
/** Wrapper around a pcre2_compile_context. */
class CompileContext {

View File

@ -443,5 +443,32 @@ TEST(PcreTest, HeapLimit) {
}
}
TEST(PcreTest, MaxPatternLength) {
const std::string s(1024, 'y');
// Test patterns of various lengths
for (uint32_t patternLength : {2, 5, 10, 100, 1000, 10000, 16384, 20000, 30000, 32764}) {
std::string pattern = fmt::format("^{}", std::string(patternLength - 1, 'x'));
// We use an anchored pattern here in order to reduce internal memory usage during regex
// execution.
Regex customLimitRegex(pattern, CompileOptions{});
ASSERT_FALSE(customLimitRegex.error());
auto match = customLimitRegex.match(s);
ASSERT_TRUE(match.error());
ASSERT_EQ(match.error().message(), "no match");
}
// Test a pattern that is above the maximum allowed pattern length.
{
const std::string kTooLongErrorMessage =
"pattern string is longer than the limit set by the application";
const std::string pattern = fmt::format("^{}", std::string(32764, 'x'));
Regex defaultLimitRegex(pattern, CompileOptions{});
ASSERT_TRUE(defaultLimitRegex.error());
ASSERT_EQ(defaultLimitRegex.error().message(), kTooLongErrorMessage);
}
}
} // namespace
} // namespace mongo::pcre