mirror of https://github.com/mongodb/mongo
139 lines
5.4 KiB
JavaScript
139 lines
5.4 KiB
JavaScript
/**
|
|
* Exercises two-phase commit failures.
|
|
*
|
|
* @tags: [uses_transactions, uses_multi_shard_transaction, multiversion_incompatible]
|
|
*/
|
|
|
|
import {ShardingTest} from "jstests/libs/shardingtest.js";
|
|
import {extractUUIDFromObject} from "jstests/libs/uuid_util.js";
|
|
import {
|
|
flushRoutersAndRefreshShardMetadata,
|
|
getCoordinatorFailpoints,
|
|
} from "jstests/sharding/libs/sharded_transactions_helpers.js";
|
|
import {runCommitThroughMongos} from "jstests/sharding/libs/txn_two_phase_commit_util.js";
|
|
|
|
const dbName = "test";
|
|
const collName = "foo";
|
|
const ns = dbName + "." + collName;
|
|
|
|
// Lower the transaction timeout for participants, since this test exercises the case where the
|
|
// coordinator hangs before writing the participant list and then checks that the transaction is
|
|
// eventually aborted on the coordinator, and the coordinator will only abort on reaching the
|
|
// transaction timeout.
|
|
TestData.transactionLifetimeLimitSeconds = 30;
|
|
|
|
let lsid = {id: UUID()};
|
|
let txnNumber = 0;
|
|
|
|
(function () {
|
|
let st = new ShardingTest({
|
|
shards: 3,
|
|
rs0: {nodes: [{}]},
|
|
causallyConsistent: true,
|
|
other: {mongosOptions: {verbose: 3}},
|
|
});
|
|
|
|
let coordinatorReplSetTest = st.rs0;
|
|
|
|
let participant0 = st.shard0;
|
|
let participant1 = st.shard1;
|
|
let participant2 = st.shard2;
|
|
|
|
const setUp = function () {
|
|
// Create a sharded collection with a chunk on each shard:
|
|
// shard0: [-inf, 0)
|
|
// shard1: [0, 10)
|
|
// shard2: [10, +inf)
|
|
assert.commandWorked(st.s.adminCommand({enableSharding: dbName, primaryShard: participant0.shardName}));
|
|
assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {_id: 1}}));
|
|
assert.commandWorked(st.s.adminCommand({split: ns, middle: {_id: 0}}));
|
|
assert.commandWorked(st.s.adminCommand({split: ns, middle: {_id: 10}}));
|
|
assert.commandWorked(st.s.adminCommand({moveChunk: ns, find: {_id: 0}, to: participant1.shardName}));
|
|
assert.commandWorked(st.s.adminCommand({moveChunk: ns, find: {_id: 10}, to: participant2.shardName}));
|
|
|
|
flushRoutersAndRefreshShardMetadata(st, {ns});
|
|
|
|
// Start a new transaction by inserting a document onto each shard.
|
|
assert.commandWorked(
|
|
st.s.getDB(dbName).runCommand({
|
|
insert: collName,
|
|
documents: [{_id: -5}, {_id: 5}, {_id: 15}],
|
|
lsid: lsid,
|
|
txnNumber: NumberLong(txnNumber),
|
|
stmtId: NumberInt(0),
|
|
startTransaction: true,
|
|
autocommit: false,
|
|
}),
|
|
);
|
|
};
|
|
|
|
const testCommitProtocol = function (failpointData, expectError = ErrorCodes.NoSuchTransaction) {
|
|
jsTest.log("Testing commit protocol with failpointData: " + tojson(failpointData));
|
|
|
|
txnNumber++;
|
|
setUp();
|
|
|
|
let coordPrimary = coordinatorReplSetTest.getPrimary();
|
|
|
|
assert.commandWorked(
|
|
coordPrimary.adminCommand({
|
|
configureFailPoint: failpointData.failpoint,
|
|
mode: {skip: failpointData.skip ? failpointData.skip : 0},
|
|
data: failpointData.options ? failpointData.options : {},
|
|
}),
|
|
);
|
|
|
|
runCommitThroughMongos(extractUUIDFromObject(lsid.id), txnNumber, st.s.host, expectError);
|
|
|
|
// Check that the transaction aborted as expected.
|
|
jsTest.log("Verify that the transaction was aborted on all shards.");
|
|
assert.eq(0, st.s.getDB(dbName).getCollection(collName).find().itcount());
|
|
|
|
st.s.getDB(dbName).getCollection(collName).drop();
|
|
clearRawMongoProgramOutput();
|
|
|
|
assert.commandWorked(coordPrimary.adminCommand({configureFailPoint: failpointData.failpoint, mode: "off"}));
|
|
};
|
|
|
|
//
|
|
// Run through all the failpoints. Each failpoint is targeting different error handling block.
|
|
//
|
|
|
|
// This triggers timeout in the chain registered in TransactionCoordinator constructor, which throws
|
|
// TransactionCoordinatorReachedAbortDecision caught by the onError() in the same chain.
|
|
testCommitProtocol(
|
|
getCoordinatorFailpoints().find((data) => data.failpoint === "hangBeforeWritingParticipantList"),
|
|
);
|
|
|
|
// This is one of the standard error codes that a transaction shard can generate and is supported by
|
|
// the transaction coordinator directly.
|
|
testCommitProtocol({
|
|
failpoint: "failRemoteTransactionCommand",
|
|
numTimesShouldBeHit: 2,
|
|
options: {command: "prepareTransaction", code: ErrorCodes.NoSuchTransaction},
|
|
});
|
|
|
|
// Similar to above, except with different error code, which is expected to be propagated to the
|
|
// abort response.
|
|
testCommitProtocol(
|
|
{
|
|
failpoint: "failRemoteTransactionCommand",
|
|
numTimesShouldBeHit: 2,
|
|
options: {command: "prepareTransaction", code: ErrorCodes.TransactionTooOld},
|
|
},
|
|
ErrorCodes.TransactionTooOld,
|
|
);
|
|
|
|
// This is one of the non standard error codes from a transaction shard, it is retried by the
|
|
// per-shard retry logic and it is eventually converted into
|
|
// TransactionCoordinatorReachedAbortDecision and is caught by the onError block in the
|
|
// sendPrepareToShard() helper method.
|
|
testCommitProtocol({
|
|
failpoint: "failRemoteTransactionCommand",
|
|
numTimesShouldBeHit: 2,
|
|
options: {command: "prepareTransaction", code: ErrorCodes.CommandNotFound},
|
|
});
|
|
|
|
st.stop();
|
|
})();
|