SERVER-113625: Add logging to special cases in distributed transactions (#44646)

GitOrigin-RevId: e9b5c6ec58564831b202c44904ec26e08538d41d
2025-12-08 17:18:23 -05:00 · 2025-12-08 17:18:23 -05:00 · 204229bf94
parent 934f9ddb71
commit 204229bf94
2 changed files with 32 additions and 2 deletions
--- a/src/mongo/db/transaction/transaction_participant.cpp
+++ b/src/mongo/db/transaction/transaction_participant.cpp
@ -879,14 +879,34 @@ bool TransactionParticipant::Participant::_shouldRestartTransactionOnReuseActive
                              << " in state " << txnParticipant.o().txnState,
                txnParticipant.transactionIsAbortedWithoutPrepare());
        }
-
+        LOGV2_DEBUG(
+            11362500,
+            3,
+            "Restarting transaction and reusing active txnNumber because transaction state is "
+            "None.",
+            "sessionId"_attr = _sessionId(),
+            "txnNumber"_attr = o().activeTxnNumberAndRetryCounter.getTxnNumber());
        return true;
    } else if (o().txnState.isInSet(TransactionState::kAbortedWithoutPrepare)) {
+        LOGV2_DEBUG(
+            11362501,
+            3,
+            "Restarting transaction and reusing active txnNumber because transaction was aborted "
+            "and not part of a two phase transaction.",
+            "sessionId"_attr = _sessionId(),
+            "txnNumber"_attr = o().activeTxnNumberAndRetryCounter.getTxnNumber());
        return true;
    } else if (_isInternalSessionForRetryableWrite() &&
               o().txnState.isInSet(TransactionState::kCommitted)) {
        // We won't actually restart the transaction, we'll early return later on and skip resetting
        // any state and metrics
+        LOGV2_DEBUG(
+            11362502,
+            3,
+            "Restarting transaction and reusing active txnNumber because transaction participant "
+            "is in retryable write mode and TransactionState is Committed.",
+            "sessionId"_attr = _sessionId(),
+            "txnNumber"_attr = o().activeTxnNumberAndRetryCounter.getTxnNumber());
        return true;
    } else {
        uassert(
--- a/src/mongo/s/transaction_router.cpp
+++ b/src/mongo/s/transaction_router.cpp
@ -1050,7 +1050,12 @@ std::vector<ShardId> TransactionRouter::Router::_getPendingParticipants() const
 void TransactionRouter::Router::_clearPendingParticipants(OperationContext* opCtx,
                                                          boost::optional<Status> optStatus) {
    const auto pendingParticipants = _getPendingParticipants();
-
+    LOGV2_DEBUG(11362503,
+                3,
+                "Clearing pending participants",
+                "pendingParticipantList"_attr = pendingParticipants,
+                "sessionId"_attr = _sessionId(),
+                "txnNumber"_attr = o().txnNumberAndRetryCounter.getTxnNumber());
    // If there was a stale shard or db routing error and the transaction is retryable then we don't
    // send abort to any participant to prevent a race between the aborts and the commands retried
    if (!o().subRouter && (!optStatus || !_errorAllowsRetryOnStaleShardOrDb(*optStatus))) {
@ -1313,6 +1318,11 @@ void TransactionRouter::Router::_continueTxn(OperationContext* opCtx,
            // is a retry coming from the parent router where the parent router picked a new
            // clusterTime for the transaction.
            if (o().participants.empty()) {
+                LOGV2_DEBUG(11362504,
+                            3,
+                            "Transaction was retried at the router level",
+                            "sessionId"_attr = _sessionId(),
+                            "txnNumber"_attr = o().txnNumberAndRetryCounter.getTxnNumber());
                invariant(opCtx->isActiveTransactionParticipant());
                tassert(8980602,
                        "Transaction sub-router tried to continue a transaction without any "