SERVER-113625: Add logging to special cases in distributed transactions (#44646)

GitOrigin-RevId: e9b5c6ec58564831b202c44904ec26e08538d41d
This commit is contained in:
Ruchitha Rajaghatta 2025-12-08 17:18:23 -05:00 committed by MongoDB Bot
parent 934f9ddb71
commit 204229bf94
2 changed files with 32 additions and 2 deletions

View File

@ -879,14 +879,34 @@ bool TransactionParticipant::Participant::_shouldRestartTransactionOnReuseActive
<< " in state " << txnParticipant.o().txnState,
txnParticipant.transactionIsAbortedWithoutPrepare());
}
LOGV2_DEBUG(
11362500,
3,
"Restarting transaction and reusing active txnNumber because transaction state is "
"None.",
"sessionId"_attr = _sessionId(),
"txnNumber"_attr = o().activeTxnNumberAndRetryCounter.getTxnNumber());
return true;
} else if (o().txnState.isInSet(TransactionState::kAbortedWithoutPrepare)) {
LOGV2_DEBUG(
11362501,
3,
"Restarting transaction and reusing active txnNumber because transaction was aborted "
"and not part of a two phase transaction.",
"sessionId"_attr = _sessionId(),
"txnNumber"_attr = o().activeTxnNumberAndRetryCounter.getTxnNumber());
return true;
} else if (_isInternalSessionForRetryableWrite() &&
o().txnState.isInSet(TransactionState::kCommitted)) {
// We won't actually restart the transaction, we'll early return later on and skip resetting
// any state and metrics
LOGV2_DEBUG(
11362502,
3,
"Restarting transaction and reusing active txnNumber because transaction participant "
"is in retryable write mode and TransactionState is Committed.",
"sessionId"_attr = _sessionId(),
"txnNumber"_attr = o().activeTxnNumberAndRetryCounter.getTxnNumber());
return true;
} else {
uassert(

View File

@ -1050,7 +1050,12 @@ std::vector<ShardId> TransactionRouter::Router::_getPendingParticipants() const
void TransactionRouter::Router::_clearPendingParticipants(OperationContext* opCtx,
boost::optional<Status> optStatus) {
const auto pendingParticipants = _getPendingParticipants();
LOGV2_DEBUG(11362503,
3,
"Clearing pending participants",
"pendingParticipantList"_attr = pendingParticipants,
"sessionId"_attr = _sessionId(),
"txnNumber"_attr = o().txnNumberAndRetryCounter.getTxnNumber());
// If there was a stale shard or db routing error and the transaction is retryable then we don't
// send abort to any participant to prevent a race between the aborts and the commands retried
if (!o().subRouter && (!optStatus || !_errorAllowsRetryOnStaleShardOrDb(*optStatus))) {
@ -1313,6 +1318,11 @@ void TransactionRouter::Router::_continueTxn(OperationContext* opCtx,
// is a retry coming from the parent router where the parent router picked a new
// clusterTime for the transaction.
if (o().participants.empty()) {
LOGV2_DEBUG(11362504,
3,
"Transaction was retried at the router level",
"sessionId"_attr = _sessionId(),
"txnNumber"_attr = o().txnNumberAndRetryCounter.getTxnNumber());
invariant(opCtx->isActiveTransactionParticipant());
tassert(8980602,
"Transaction sub-router tried to continue a transaction without any "