mirror of https://github.com/mongodb/mongo
183 lines
6.7 KiB
JavaScript
183 lines
6.7 KiB
JavaScript
/*
|
|
* Tests that the checkMetadataConsistency command can be interrupted at important points,
|
|
* and that this interruption propagates to its subcommands.
|
|
*
|
|
* @tags: [
|
|
* does_not_support_stepdowns,
|
|
* ]
|
|
*/
|
|
|
|
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
|
|
import {assertCommandFailedWithCodeInParallelShell} from "jstests/libs/parallel_shell_helpers.js";
|
|
import {ShardingTest} from "jstests/libs/shardingtest.js";
|
|
|
|
/**
|
|
* Finds the opid of the (unique) command matching a filter over `$currentOp`.
|
|
*/
|
|
function tryFindOpid(conn, cmdFilter) {
|
|
const matchingOps = conn
|
|
.getDB("admin")
|
|
.aggregate([{$currentOp: {localOps: true}}, {$match: cmdFilter}])
|
|
.toArray();
|
|
assert(
|
|
matchingOps.length <= 1,
|
|
"Ambiguous match for command matching " + tojsononeline(cmdFilter) + ", found: " + tojson(matchingOps),
|
|
);
|
|
return matchingOps.length === 1 && matchingOps[0].opid != null ? matchingOps[0].opid : null;
|
|
}
|
|
|
|
function findOpid(conn, cmdFilter) {
|
|
const opid = tryFindOpid(conn, cmdFilter);
|
|
assert(opid != null, "Failed to find command matching " + tojsononeline(cmdFilter));
|
|
return opid;
|
|
}
|
|
|
|
/**
|
|
* Runs a command and checks that when it is killed while hung at fail point `hangFailPointName`,
|
|
* it gets interrupted before it reaches fail point `deadlineFailPointName`.
|
|
*/
|
|
function assertCommandInterruptsBetweenFailPoints(conn, {dbName, command}, hangFailPointName, deadlineFailPointName) {
|
|
jsTestLog(
|
|
"Checking that " +
|
|
tojsononeline(command) +
|
|
" over db=" +
|
|
dbName +
|
|
" is interruptable between " +
|
|
hangFailPointName +
|
|
" and " +
|
|
deadlineFailPointName,
|
|
);
|
|
|
|
// Configure the fail points and launch the command.
|
|
const hangFailPoint = configureFailPoint(conn, hangFailPointName);
|
|
const deadlineFailPoint = configureFailPoint(conn, deadlineFailPointName);
|
|
const awaitCommandInterrupted = assertCommandFailedWithCodeInParallelShell(
|
|
conn,
|
|
conn.getDB(dbName),
|
|
{...command, comment: jsTestName()},
|
|
ErrorCodes.Interrupted,
|
|
);
|
|
|
|
// Kill the command after the hang fail point is hit.
|
|
hangFailPoint.wait();
|
|
conn.getDB("admin").killOp(findOpid(conn, {"command.comment": jsTestName()}));
|
|
|
|
// Release the command and verify that it got interrupted.
|
|
hangFailPoint.off();
|
|
awaitCommandInterrupted();
|
|
|
|
// We can disable the deadline fail point now that the command finished.
|
|
// If the command reached the deadline, the server will `tassert`, making the test fail.
|
|
deadlineFailPoint.off();
|
|
}
|
|
|
|
/**
|
|
* Runs a command, which is assumed to run other sub-commands.
|
|
* At fail point `hangFailPointName` inside the sub-command, the main command is killed.
|
|
* Checks that the sub-command is also killed before it reaches fail point `deadlineFailPointName`.
|
|
*/
|
|
function assertSubCommandKilledAndInterruptsBetweenFailPoints(
|
|
conn,
|
|
{dbName, command},
|
|
subConn,
|
|
subCmdFilter,
|
|
hangFailPointName,
|
|
deadlineFailPointName,
|
|
) {
|
|
jsTestLog(
|
|
"Checking that " +
|
|
tojsononeline(command) +
|
|
" over " +
|
|
dbName +
|
|
" kills the subcommand matching " +
|
|
tojsononeline(subCmdFilter) +
|
|
" between " +
|
|
hangFailPointName +
|
|
" and " +
|
|
deadlineFailPointName,
|
|
);
|
|
|
|
// Configure the fail points and launch the main command.
|
|
const hangFailPoint = configureFailPoint(subConn, hangFailPointName);
|
|
const deadlineFailPoint = configureFailPoint(subConn, deadlineFailPointName);
|
|
const awaitCommandInterrupted = assertCommandFailedWithCodeInParallelShell(
|
|
conn,
|
|
conn.getDB(dbName),
|
|
{...command, comment: jsTestName()},
|
|
ErrorCodes.Interrupted,
|
|
);
|
|
|
|
// Kill the top-level command after the hang fail point is hit and check for interruption.
|
|
hangFailPoint.wait();
|
|
conn.getDB("admin").killOp(findOpid(conn, {"command.comment": jsTestName()}));
|
|
awaitCommandInterrupted();
|
|
|
|
// Release the subcommand and wait for it to be killed.
|
|
const opid = findOpid(subConn, subCmdFilter);
|
|
assert.soon(() => tryFindOpid(subConn, {opid: opid, killPending: true}) != null);
|
|
hangFailPoint.off();
|
|
assert.soon(() => tryFindOpid(subConn, {opid: opid}) == null);
|
|
|
|
// We can disable the deadline fail point now that the command finished.
|
|
// If the command reached the deadline, the server will `tassert`, making the test fail.
|
|
deadlineFailPoint.off();
|
|
}
|
|
|
|
// Set up a database with a sharded collection to run checkMetadataConsistency on.
|
|
const st = new ShardingTest({shards: 2});
|
|
|
|
const kDbName = jsTestName(),
|
|
kCollName = "coll";
|
|
assert.commandWorked(st.s.adminCommand({enableSharding: kDbName, primaryShard: st.shard0.shardName}));
|
|
st.shardColl(st.s.getDB(kDbName).getCollection(kCollName), {skey: 1});
|
|
|
|
// Run tests for checkMetadataConsistency on a cluster/DB/collection level.
|
|
const checkMetadataForCluster = {
|
|
dbName: "admin",
|
|
command: {checkMetadataConsistency: 1},
|
|
};
|
|
const checkMetadataForDb = {
|
|
dbName: kDbName,
|
|
command: {checkMetadataConsistency: 1},
|
|
};
|
|
const checkMetadataForColl = {
|
|
dbName: kDbName,
|
|
command: {checkMetadataConsistency: kCollName},
|
|
};
|
|
|
|
for (const cmc of [checkMetadataForCluster, checkMetadataForDb, checkMetadataForColl]) {
|
|
// Check that checkMetadataConsistency can be interrupted while establishing cursors on all
|
|
// shards that are the primary shard for one or more databases.
|
|
assertCommandInterruptsBetweenFailPoints(
|
|
st.s,
|
|
cmc,
|
|
"hangCheckMetadataBeforeEstablishCursors",
|
|
"tripwireCheckMetadataAfterEstablishCursors",
|
|
);
|
|
|
|
// Check that _shardsvrCheckMetadataConsistency can be interrupted while taking the DDL lock,
|
|
// which is the main bottleneck when there are concurrent DDL operations.
|
|
assertSubCommandKilledAndInterruptsBetweenFailPoints(
|
|
st.s,
|
|
cmc,
|
|
st.rs0.getPrimary(),
|
|
{"command._shardsvrCheckMetadataConsistency": {$exists: true}},
|
|
"hangShardCheckMetadataBeforeDDLLock",
|
|
"tripwireShardCheckMetadataAfterDDLLock",
|
|
);
|
|
|
|
// Check that _shardsvrCheckMetadataConsistency can be interrupted while establishing cursors
|
|
// on all database participants. Since each database is checked sequentially, guaranteeing
|
|
// interruptability here ensures that the command stops working on further databases.
|
|
assertSubCommandKilledAndInterruptsBetweenFailPoints(
|
|
st.s,
|
|
cmc,
|
|
st.rs0.getPrimary(),
|
|
{"command._shardsvrCheckMetadataConsistency": {$exists: true}},
|
|
"hangShardCheckMetadataBeforeEstablishCursors",
|
|
"tripwireShardCheckMetadataAfterEstablishCursors",
|
|
);
|
|
}
|
|
|
|
st.stop();
|