Additional log information for cluster accept handler and message processing (#2815)

Enhance debugging for cluster logs

[1] Add human node names in cluster tests so that we can easily
understand which nodes we are interacting with:

```
pong packet received from: 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) from client: :0
node 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) announces that it is a primary in shard c6d1152caee49a5e70cb4b77d1549386078be603
Reconfiguring node 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) as primary for shard c6d1152caee49a5e70cb4b77d1549386078be603
Configuration change detected. Reconfiguring myself as a replica of node 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) in shard c6d1152caee49a5e70cb4b77d1549386078be603
```



[2] Currently there are logs showing the address of incoming
connections:

```
Accepting cluster node connection from 127.0.0.1:59956
Accepting cluster node connection from 127.0.0.1:59957
Accepting cluster node connection from 127.0.0.1:59958
Accepting cluster node connection from 127.0.0.1:59959
```

but we have no idea which nodes these connections refer to. I added a
logging statement when the node is set to the inbound link connection.

```
Bound cluster node 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) to connection of client 127.0.0.1:59956
```



[3] Add a debug log when processing a packet to show the packet type,
sender node name, and sender client port (this also has the benefit of
telling us whether this is an inbound or outbound link).

```
pong packet received from: 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) from client: :0
ping packet received from: 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) from client: 127.0.0.1:59973
fail packet received from: 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) from client: 127.0.0.1:59973
auth-req packet received from: 92a960ffd62f1bd04efeb260b30fe9ca6b9294ed (R4) from client: 127.0.0.1:59973
```

---------

Signed-off-by: Zhijun <dszhijun@gmail.com>
This commit is contained in:
Zhijun Liao 2025-11-26 09:07:21 +08:00 committed by GitHub
parent e5de417f1e
commit da3c43d76b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 32 additions and 8 deletions

View File

@ -1708,6 +1708,17 @@ void setClusterNodeToInboundClusterLink(clusterNode *node, clusterLink *link) {
serverAssert(!node->inbound_link);
node->inbound_link = link;
link->node = node;
if (server.verbosity <= LL_VERBOSE) {
char ip[NET_IP_STR_LEN];
int port;
if (connAddrPeerName(link->conn, ip, sizeof(ip), &port) != -1) {
serverLog(LL_VERBOSE, "Bound cluster node %.40s (%s) to connection of client %s:%d",
node->name, node->human_nodename, ip, port);
} else {
serverLog(LL_VERBOSE, "Error resolving the inbound connection address of node %.40s (%s)",
node->name, node->human_nodename);
}
}
}
static void clusterConnAcceptHandler(connection *conn) {
@ -3833,11 +3844,21 @@ int clusterProcessPacket(clusterLink *link) {
clusterSendPing(link, CLUSTERMSG_TYPE_PONG);
}
if (server.verbosity <= LL_DEBUG) {
char ip[NET_IP_STR_LEN];
int port;
if (connAddrPeerName(link->conn, ip, sizeof(ip), &port) != -1) {
serverLog(LL_DEBUG, "%s packet received from: %.40s (%s) from client: %s:%d",
clusterGetMessageTypeString(type),
clusterLinkGetNodeName(link), clusterLinkGetHumanNodeName(link),
ip, port);
} else {
serverLog(LL_DEBUG, "Error resolving the address of packet sender %.40s (%s)",
clusterLinkGetNodeName(link), clusterLinkGetHumanNodeName(link));
}
}
/* PING, PONG, MEET: process config information. */
if (type == CLUSTERMSG_TYPE_PING || type == CLUSTERMSG_TYPE_PONG || type == CLUSTERMSG_TYPE_MEET) {
serverLog(LL_DEBUG, "%s packet received: %.40s", clusterGetMessageTypeString(type),
clusterLinkGetNodeName(link));
if (sender && nodeInMeetState(sender)) {
/* Once we get a response for MEET from the sender, we can stop sending more MEET. */
sender->flags &= ~CLUSTER_NODE_MEET;

View File

@ -214,6 +214,9 @@ proc cluster_setup {masters replicas node_count slot_allocator replica_allocator
for {set i 0} {$i < $node_count} {incr i} {
R $i CLUSTER SET-CONFIG-EPOCH $config_epoch
incr config_epoch
# Make it easier to understand how the server interacts with
# other nodes when reading the server logs.
R $i CONFIG SET cluster-announce-human-nodename "R$i"
}
# Have all nodes meet

View File

@ -478,21 +478,21 @@ start_cluster 3 1 {tags {external:skip cluster}} {
# verify we print the logs.
# Both importing slots and migrating slots are move to R3.
set pattern "*Failover occurred in migration source. Update importing source for slot 0 to node $R3_nodeid () in shard $R3_shardid*"
set pattern "*Failover occurred in migration source. Update importing source for slot 0 to node $R3_nodeid * in shard $R3_shardid*"
verify_log_message -1 $pattern $loglines1
set pattern "*Failover occurred in migration target. Slot 5462 is now being migrated to node $R3_nodeid () in shard $R3_shardid*"
set pattern "*Failover occurred in migration target. Slot 5462 is now being migrated to node $R3_nodeid * in shard $R3_shardid*"
verify_log_message -1 $pattern $loglines1
# Both slots are move to R3.
set R0_slots 5462
set pattern "*A failover occurred in shard $R3_shardid; node $R0_nodeid () lost $R0_slots slot(s) and failed over to node $R3_nodeid*"
set pattern "*A failover occurred in shard $R3_shardid; node $R0_nodeid * lost $R0_slots slot(s) and failed over to node $R3_nodeid*"
verify_log_message -1 $pattern $loglines1
verify_log_message -2 $pattern $loglines2
# Both importing slots and migrating slots are move to R3.
set pattern "*A failover occurred in migration source. Update importing source of 1 slot(s) to node $R3_nodeid () in shard $R3_shardid*"
set pattern "*A failover occurred in migration source. Update importing source of 1 slot(s) to node $R3_nodeid * in shard $R3_shardid*"
verify_log_message -1 $pattern $loglines1
set pattern "*A failover occurred in migration target. Update migrating target of 1 slot(s) to node $R3_nodeid () in shard $R3_shardid*"
set pattern "*A failover occurred in migration target. Update migrating target of 1 slot(s) to node $R3_nodeid * in shard $R3_shardid*"
verify_log_message -1 $pattern $loglines1
R 1 debug disable-cluster-reconnection 0