mirror of https://github.com/mongodb/mongo
149 lines
4.7 KiB
JavaScript
149 lines
4.7 KiB
JavaScript
/**
|
|
* Tests that the resharding coordinator correctly handles critical timeout and abort while waiting
|
|
* for responses for commands against donors or recipients in the critical section.
|
|
*
|
|
* @tags: [
|
|
* requires_fcv_83,
|
|
* featureFlagReshardingVerification,
|
|
* featureFlagReshardingSkipCloningAndApplyingIfApplicable,
|
|
* ]
|
|
*/
|
|
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
|
|
import {Thread} from "jstests/libs/parallelTester.js";
|
|
import {ShardingTest} from "jstests/libs/shardingtest.js";
|
|
|
|
function runMoveCollection(mongosHost, ns, toShard) {
|
|
const mongos = new Mongo(mongosHost);
|
|
return mongos.adminCommand({
|
|
moveCollection: ns,
|
|
toShard,
|
|
});
|
|
}
|
|
|
|
const st = new ShardingTest({
|
|
shards: 2,
|
|
other: {
|
|
configOptions: {
|
|
setParameter: {
|
|
// Set a large threshold to make each resharding operation below able to enter the
|
|
// critical section quickly even when running on slow build variants.
|
|
remainingReshardingOperationTimeThresholdMillis: 5000,
|
|
},
|
|
},
|
|
},
|
|
});
|
|
const shard0Primary = st.rs0.getPrimary();
|
|
const configPrimary = st.configRS.getPrimary();
|
|
|
|
const dbName = "testDb";
|
|
const testDb = st.s.getDB(dbName);
|
|
assert.commandWorked(st.s.adminCommand({enableSharding: dbName, primaryShard: st.shard0.shardName}));
|
|
|
|
const testCriticalSectionTimeoutMS = 1000;
|
|
const cmdBlockTimeoutMS = 60 * 60 * 1000;
|
|
let testNum = 0;
|
|
|
|
function configureFailPointToBlockCommand(node, cmdName, numSkips) {
|
|
return configureFailPoint(
|
|
node,
|
|
"failCommand",
|
|
{
|
|
failCommands: [cmdName],
|
|
blockConnection: true,
|
|
blockTimeMS: cmdBlockTimeoutMS,
|
|
failInternalCommands: true,
|
|
},
|
|
{skip: numSkips},
|
|
);
|
|
}
|
|
|
|
function testCriticalSectionTimeoutWhileWaiting(cmdName, numSkips) {
|
|
jsTest.log("Test resharding critical section timeout while coordinator is waiting for responses for " + cmdName);
|
|
|
|
const collName = "testColl" + testNum++;
|
|
const ns = dbName + "." + collName;
|
|
const testColl = testDb.getCollection(collName);
|
|
assert.commandWorked(testColl.createIndex({x: 1}));
|
|
assert.commandWorked(
|
|
testColl.insert([
|
|
{_id: -1, x: -1, y: -1},
|
|
{_id: 1, x: 1, y: 1},
|
|
]),
|
|
);
|
|
|
|
const originalCriticalSectionTimeout = assert.commandWorked(
|
|
configPrimary.adminCommand({
|
|
setParameter: 1,
|
|
reshardingCriticalSectionTimeoutMillis: testCriticalSectionTimeoutMS,
|
|
}),
|
|
).was;
|
|
const fp = configureFailPointToBlockCommand(shard0Primary, cmdName, numSkips);
|
|
|
|
const moveThread = new Thread(runMoveCollection, st.s.host, ns, st.shard1.shardName);
|
|
moveThread.start();
|
|
moveThread.join();
|
|
assert.commandFailedWithCode(moveThread.returnData(), ErrorCodes.ReshardingCriticalSectionTimeout);
|
|
|
|
fp.off();
|
|
assert.commandWorked(
|
|
configPrimary.adminCommand({
|
|
setParameter: 1,
|
|
reshardingCriticalSectionTimeoutMillis: originalCriticalSectionTimeout,
|
|
}),
|
|
);
|
|
}
|
|
|
|
function testAbortWhileWaiting(cmdName, numSkips) {
|
|
jsTest.log("Test aborting resharding while coordinator is waiting for responses for " + cmdName);
|
|
|
|
const collName = "testColl" + testNum++;
|
|
const ns = dbName + "." + collName;
|
|
const testColl = testDb.getCollection(collName);
|
|
assert.commandWorked(testColl.createIndex({x: 1}));
|
|
assert.commandWorked(
|
|
testColl.insert([
|
|
{_id: -1, x: -1, y: -1},
|
|
{_id: 1, x: 1, y: 1},
|
|
]),
|
|
);
|
|
|
|
const fp = configureFailPointToBlockCommand(shard0Primary, cmdName, numSkips);
|
|
|
|
const moveThread = new Thread(runMoveCollection, st.s.host, ns, st.shard1.shardName);
|
|
moveThread.start();
|
|
|
|
fp.wait();
|
|
assert.commandWorked(st.s.adminCommand({abortMoveCollection: ns}));
|
|
|
|
moveThread.join();
|
|
assert.commandFailedWithCode(moveThread.returnData(), ErrorCodes.ReshardCollectionAborted);
|
|
fp.off();
|
|
}
|
|
|
|
const cmdsToBlock = [
|
|
{
|
|
cmdName: "_flushReshardingStateChange",
|
|
// Skip the refreshes for transitioning to "cloning" and to "applying" to test the error
|
|
// during critical section. This count assumes that _shardsvrReshardRecipientClone is
|
|
// disabled.
|
|
numSkips: 2,
|
|
},
|
|
{
|
|
cmdName: "_shardsvrReshardingDonorFetchFinalCollectionStats",
|
|
// No skipping.
|
|
numSkips: 0,
|
|
},
|
|
{
|
|
cmdName: "_shardsvrReshardRecipientCriticalSectionStarted",
|
|
// No skipping.
|
|
numSkips: 0,
|
|
},
|
|
];
|
|
|
|
for (const {cmdName, numSkips} of cmdsToBlock) {
|
|
testCriticalSectionTimeoutWhileWaiting(cmdName, numSkips);
|
|
testAbortWhileWaiting(cmdName, numSkips);
|
|
}
|
|
|
|
st.stop();
|