Strictly check CRLF when parsing querybuf (#2872)

Currently, when parsing querybuf, we are not checking for CRLF,
instead we assume the last two characters are CRLF by default,
as shown in the following example:
```
telnet 127.0.0.1 6379
Trying 127.0.0.1...
Connected to 127.0.0.1.
Escape character is '^]'.
*3
$3
set
$3
key
$5
value12
+OK
get key
$5
value

*3
$3
set
$3
key
$5
value12345
+OK
-ERR unknown command '345', with args beginning with:
```

This should actually be considered a protocol error. When a bug
occurs in the client-side implementation, we may execute incorrect
requests (writing incorrect data is the most serious of these).

---------

Signed-off-by: Binbin <binloveplay1314@qq.com>
This commit is contained in:
Binbin 2025-12-14 15:10:57 +08:00 committed by GitHub
parent cd6faaa726
commit 51f871ae52
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 79 additions and 1 deletions

View File

@ -3006,6 +3006,9 @@ void handleParseError(client *c) {
} else if (flags & READ_FLAGS_ERROR_UNBALANCED_QUOTES) { } else if (flags & READ_FLAGS_ERROR_UNBALANCED_QUOTES) {
addReplyError(c, "Protocol error: unbalanced quotes in request"); addReplyError(c, "Protocol error: unbalanced quotes in request");
setProtocolError("unbalanced quotes in inline request", c); setProtocolError("unbalanced quotes in inline request", c);
} else if (flags & READ_FLAGS_ERROR_INVALID_CRLF) {
addReplyError(c, "Protocol error: invalid CRLF in request");
setProtocolError("invalid CRLF in request", c);
} else if (flags & READ_FLAGS_ERROR_UNEXPECTED_INLINE_FROM_REPLICATED_CLIENT) { } else if (flags & READ_FLAGS_ERROR_UNEXPECTED_INLINE_FROM_REPLICATED_CLIENT) {
if (getClientType(c) == CLIENT_TYPE_SLOT_IMPORT) { if (getClientType(c) == CLIENT_TYPE_SLOT_IMPORT) {
serverLog(LL_WARNING, "WARNING: Receiving inline protocol from slot import, import stream corruption? Closing the " serverLog(LL_WARNING, "WARNING: Receiving inline protocol from slot import, import stream corruption? Closing the "
@ -3026,7 +3029,8 @@ int isParsingError(client *c) {
READ_FLAGS_ERROR_INVALID_MULTIBULK_LEN | READ_FLAGS_ERROR_UNAUTHENTICATED_MULTIBULK_LEN | READ_FLAGS_ERROR_INVALID_MULTIBULK_LEN | READ_FLAGS_ERROR_UNAUTHENTICATED_MULTIBULK_LEN |
READ_FLAGS_ERROR_UNAUTHENTICATED_BULK_LEN | READ_FLAGS_ERROR_MBULK_INVALID_BULK_LEN | READ_FLAGS_ERROR_UNAUTHENTICATED_BULK_LEN | READ_FLAGS_ERROR_MBULK_INVALID_BULK_LEN |
READ_FLAGS_ERROR_BIG_BULK_COUNT | READ_FLAGS_ERROR_MBULK_UNEXPECTED_CHARACTER | READ_FLAGS_ERROR_BIG_BULK_COUNT | READ_FLAGS_ERROR_MBULK_UNEXPECTED_CHARACTER |
READ_FLAGS_ERROR_UNEXPECTED_INLINE_FROM_REPLICATED_CLIENT | READ_FLAGS_ERROR_UNBALANCED_QUOTES); READ_FLAGS_ERROR_UNEXPECTED_INLINE_FROM_REPLICATED_CLIENT | READ_FLAGS_ERROR_UNBALANCED_QUOTES |
READ_FLAGS_ERROR_INVALID_CRLF);
} }
/* This function is called after the query-buffer was parsed. /* This function is called after the query-buffer was parsed.
@ -3503,6 +3507,11 @@ static int parseMultibulk(client *c,
/* Buffer should also contain \n */ /* Buffer should also contain \n */
if (newline - (c->querybuf + c->qb_pos) > (ssize_t)(sdslen(c->querybuf) - c->qb_pos - 2)) return 0; if (newline - (c->querybuf + c->qb_pos) > (ssize_t)(sdslen(c->querybuf) - c->qb_pos - 2)) return 0;
/* Check that what follows \r is a real \n */
if (unlikely(newline[1] != '\n')) {
return READ_FLAGS_ERROR_INVALID_CRLF;
}
/* We know for sure there is a whole line since newline != NULL, /* We know for sure there is a whole line since newline != NULL,
* so go ahead and find out the multi bulk length. */ * so go ahead and find out the multi bulk length. */
serverAssertWithInfo(c, NULL, c->querybuf[c->qb_pos] == '*'); serverAssertWithInfo(c, NULL, c->querybuf[c->qb_pos] == '*');
@ -3582,6 +3591,11 @@ static int parseMultibulk(client *c,
return READ_FLAGS_ERROR_MBULK_UNEXPECTED_CHARACTER; return READ_FLAGS_ERROR_MBULK_UNEXPECTED_CHARACTER;
} }
/* Check that what follows \r is a real \n */
if (unlikely(newline[1] != '\n')) {
return READ_FLAGS_ERROR_INVALID_CRLF;
}
size_t bulklen_slen = newline - (c->querybuf + c->qb_pos + 1); size_t bulklen_slen = newline - (c->querybuf + c->qb_pos + 1);
ok = string2ll(c->querybuf + c->qb_pos + 1, bulklen_slen, &ll); ok = string2ll(c->querybuf + c->qb_pos + 1, bulklen_slen, &ll);
if (!ok || ll < 0 || (!(is_replicated) && ll > server.proto_max_bulk_len)) { if (!ok || ll < 0 || (!(is_replicated) && ll > server.proto_max_bulk_len)) {
@ -3637,6 +3651,12 @@ static int parseMultibulk(client *c,
*argv = zrealloc(*argv, sizeof(robj *) * (*argv_len)); *argv = zrealloc(*argv, sizeof(robj *) * (*argv_len));
} }
/* Check that what follows argv is a real \r\n */
if (unlikely(c->querybuf[c->qb_pos + c->bulklen] != '\r' ||
c->querybuf[c->qb_pos + c->bulklen + 1] != '\n')) {
return READ_FLAGS_ERROR_INVALID_CRLF;
}
/* Optimization: if a non-replicated client's buffer contains JUST our bulk element /* Optimization: if a non-replicated client's buffer contains JUST our bulk element
* instead of creating a new object by *copying* the sds we * instead of creating a new object by *copying* the sds we
* just use the current sds string. */ * just use the current sds string. */

View File

@ -2777,6 +2777,7 @@ void dictVanillaFree(void *val);
#define READ_FLAGS_NO_KEYS (1 << 19) #define READ_FLAGS_NO_KEYS (1 << 19)
#define READ_FLAGS_CROSSSLOT (1 << 20) #define READ_FLAGS_CROSSSLOT (1 << 20)
#define READ_FLAGS_PREFETCHED (1 << 21) #define READ_FLAGS_PREFETCHED (1 << 21)
#define READ_FLAGS_ERROR_INVALID_CRLF (1 << 22)
/* Write flags for various write errors and states */ /* Write flags for various write errors and states */
#define WRITE_FLAGS_WRITE_ERROR (1 << 0) #define WRITE_FLAGS_WRITE_ERROR (1 << 0)

View File

@ -99,6 +99,62 @@ start_server {tags {"protocol network"}} {
assert_error "*unbalanced*" {r read} assert_error "*unbalanced*" {r read}
} }
test "Check CRLF when parsing the querybuf" {
# Command) SET key value
# RESP) *3\r\n$3\r\nSET\r\n$3\r\nkey\r\n$5\r\nvalue\r\n
# We need to strictly check these \r\n characters.
set proto "*3\r\n\$3\r\nSET\r\n\$3\r\nkey\r\n\$5\r\nvalue\r\n"
reconnect; r write $proto; r flush; assert_equal "OK" [r read]
# Check if multibulklen `*3\r\n` is followed by `\r\n`
set proto1 "*3x\n\$3\r\nSET\r\n\$3\r\nkey\r\n\$5\r\nvalue\r\n"
set proto2 "*3\rx\$3\r\nSET\r\n\$3\r\nkey\r\n\$5\r\nvalue\r\n"
set proto3 "*3xx\$3\r\nSET\r\n\$3\r\nkey\r\n\$5\r\nvalue\r\n"
r write $proto1; r flush; assert_error "*invalid multibulk length*" {r read}; reconnect
r write $proto2; r flush; assert_error "*invalid CRLF in request*" {r read}; reconnect
r write $proto3; r flush; assert_error "*invalid multibulk length*" {r read}; reconnect
# Check if bulklen `$3\r\n` is followed by `\r\n`
set proto1 "*3\r\n\$3x\nSET\r\n\$3\r\nkey\r\n\$5\r\nvalue\r\n"
set proto2 "*3\r\n\$3\rxSET\r\n\$3\r\nkey\r\n\$5\r\nvalue\r\n"
set proto3 "*3\r\n\$3xxSET\r\n\$3\r\nkey\r\n\$5\r\nvalue\r\n"
r write $proto1; r flush; assert_error "*invalid bulk length*" {r read}; reconnect
r write $proto2; r flush; assert_error "*invalid CRLF in request*" {r read}; reconnect
r write $proto3; r flush; assert_error "*invalid bulk length*" {r read}; reconnect
# Check if `SET\r\n` is followed by `\r\n`
set proto1 "*3\r\n\$3\r\nSET\rx\$3\r\nkey\r\n\$5\r\nvalue\r\n"
set proto2 "*3\r\n\$3\r\nSETx\n\$3\r\nkey\r\n\$5\r\nvalue\r\n"
set proto3 "*3\r\n\$3\r\nSETxx\$3\r\nkey\r\n\$5\r\nvalue\r\n"
r write $proto1; r flush; assert_error "*invalid CRLF in request*" {r read}; reconnect
r write $proto2; r flush; assert_error "*invalid CRLF in request*" {r read}; reconnect
r write $proto3; r flush; assert_error "*invalid CRLF in request*" {r read}; reconnect
# Check if `key\r\n` is followed by `\r\n`
set proto1 "*3\r\n\$3\r\nSET\r\n\$3\r\nkeyx\n\$5\r\nvalue\r\n"
set proto2 "*3\r\n\$3\r\nSET\r\n\$3\r\nkey\rx\$5\r\nvalue\r\n"
set proto3 "*3\r\n\$3\r\nSET\r\n\$3\r\nkeyxx\$5\r\nvalue\r\n"
r write $proto1; r flush; assert_error "*invalid CRLF in request*" {r read}; reconnect
r write $proto2; r flush; assert_error "*invalid CRLF in request*" {r read}; reconnect
r write $proto3; r flush; assert_error "*invalid CRLF in request*" {r read}; reconnect
# Check if bulklen `$5\r\n` is followed by `\r\n`
set proto1 "*3\r\n\$3\r\nSET\r\n\$3\r\nkey\r\n\$5x\nvalue\r\n"
set proto2 "*3\r\n\$3\r\nSET\r\n\$3\r\nkey\r\n\$5\rxvalue\r\n"
set proto3 "*3\r\n\$3\r\nSET\r\n\$3\r\nkey\r\n\$5xxvalue\r\n"
r write $proto1; r flush; assert_error "*invalid bulk length*" {r read}; reconnect
r write $proto2; r flush; assert_error "*invalid CRLF in request*" {r read}; reconnect
r write $proto3; r flush; assert_error "*invalid bulk length*" {r read}; reconnect
# Check if `value\r\n` is followed by `\r\n`
set proto1 "*3\r\n\$3\r\nSET\r\n\$3\r\nkey\r\n\$5\r\nvaluex\n"
set proto2 "*3\r\n\$3\r\nSET\r\n\$3\r\nkey\r\n\$5\r\nvalue\rx"
set proto3 "*3\r\n\$3\r\nSET\r\n\$3\r\nkey\r\n\$5\r\nvaluexx"
r write $proto1; r flush; assert_error "*invalid CRLF in request*" {r read}; reconnect
r write $proto2; r flush; assert_error "*invalid CRLF in request*" {r read}; reconnect
r write $proto3; r flush; assert_error "*invalid CRLF in request*" {r read}; reconnect
}
set c 0 set c 0
foreach seq [list "\x00" "*\x00" "$\x00" "*1\r\n$\x00"] { foreach seq [list "\x00" "*\x00" "$\x00" "*1\r\n$\x00"] {
incr c incr c
@ -108,6 +164,7 @@ start_server {tags {"protocol network"}} {
} else { } else {
set s [socket [srv 0 host] [srv 0 port]] set s [socket [srv 0 host] [srv 0 port]]
} }
fconfigure $s -translation binary
puts -nonewline $s $seq puts -nonewline $s $seq
# PROTO_INLINE_MAX_SIZE is hardcoded in Valkey code to 64K. doing the same here # PROTO_INLINE_MAX_SIZE is hardcoded in Valkey code to 64K. doing the same here
# since we would like to validate it is enforced. # since we would like to validate it is enforced.