mirror of https://github.com/mongodb/mongo
269 lines
11 KiB
JavaScript
269 lines
11 KiB
JavaScript
/**
|
|
* Run a query on a sharded cluster where one of the shards hangs. Running killCursors on the mongos
|
|
* should always succeed.
|
|
*
|
|
* Uses getMore to pin an open cursor.
|
|
* @tags: [
|
|
* requires_getmore,
|
|
* ]
|
|
*/
|
|
|
|
import {waitForCurOpByFailPoint} from "jstests/libs/curop_helpers.js";
|
|
import {ShardingTest} from "jstests/libs/shardingtest.js";
|
|
|
|
// This test manually simulates a session, which is not compatible with implicit sessions.
|
|
TestData.disableImplicitSessions = true;
|
|
|
|
const kFailPointName = "waitAfterPinningCursorBeforeGetMoreBatch";
|
|
const kFailpointOptions = {
|
|
shouldCheckForInterrupt: true,
|
|
};
|
|
const kCommandCommentString = "kill_pinned_cursor_js_test";
|
|
|
|
const st = new ShardingTest({shards: 2});
|
|
const kDBName = "test";
|
|
const mongosDB = st.s.getDB(kDBName);
|
|
const shard0DB = st.shard0.getDB(kDBName);
|
|
const shard1DB = st.shard1.getDB(kDBName);
|
|
|
|
st.s.adminCommand({enablesharding: kDBName, primaryShard: st.shard0.name});
|
|
|
|
let coll = mongosDB.jstest_kill_pinned_cursor;
|
|
|
|
for (let i = 0; i < 10; i++) {
|
|
assert.commandWorked(coll.insert({_id: i}));
|
|
}
|
|
|
|
st.shardColl(coll, {_id: 1}, {_id: 5}, {_id: 6}, kDBName, false);
|
|
|
|
// The startParallelShell function will take the string it's given and serialize it into a
|
|
// string. This means that we can't pass it functions which capture variables. Instead we use
|
|
// the trick below, by putting the values for the variables we'd like to capture inside the
|
|
// string. Kudos to Dave Storch for coming up with this idea.
|
|
function makeParallelShellFunctionString(cursorId, getMoreErrCodes, useSession, sessionId) {
|
|
let code = `const cursorId = ${cursorId.toString()};`;
|
|
code += `const kDBName = "${kDBName}";`;
|
|
code += `let collName = "${coll.getName()}";`;
|
|
code += `const useSession = ${useSession};`;
|
|
|
|
TestData.getMoreErrCodes = getMoreErrCodes;
|
|
if (useSession) {
|
|
TestData.sessionId = sessionId;
|
|
}
|
|
|
|
const runGetMore = function () {
|
|
// eslint-disable-next-line
|
|
let getMoreCmd = {getMore: cursorId, collection: collName, batchSize: 4};
|
|
|
|
if (useSession) {
|
|
getMoreCmd.lsid = TestData.sessionId;
|
|
}
|
|
|
|
// We expect that the operation will get interrupted and fail.
|
|
assert.commandFailedWithCode(db.runCommand(getMoreCmd), TestData.getMoreErrCodes);
|
|
|
|
if (useSession) {
|
|
assert.commandWorked(db.adminCommand({endSessions: [TestData.sessionId]}));
|
|
}
|
|
};
|
|
|
|
code += `(${runGetMore.toString()})();`;
|
|
return code;
|
|
}
|
|
|
|
// Tests that the various cursors involved in a sharded query can be killed, even when pinned.
|
|
//
|
|
// Sets up a sharded cursor, opens a mongos cursor, and uses failpoints to cause the mongos
|
|
// cursor to hang due to getMore commands hanging on each of the shards. Then invokes
|
|
// 'killFunc', and verifies the cursors on the shards and the mongos cursor get cleaned up.
|
|
//
|
|
// 'getMoreErrCodes' are the error codes with which we expect the getMore to fail (e.g. a
|
|
// killCursors command should cause getMore to fail with "CursorKilled", but killOp should cause
|
|
// a getMore to fail with "Interrupted").
|
|
function testShardedKillPinned({killFunc: killFunc, getMoreErrCodes: getMoreErrCodes, useSession: useSession}) {
|
|
let getMoreJoiner = null;
|
|
let cursorId;
|
|
let sessionId;
|
|
|
|
try {
|
|
// Set up the mongods to hang on a getMore request. ONLY set the failpoint on the
|
|
// mongods. Setting the failpoint on the mongos will only cause it to spin, and not
|
|
// actually send any requests out.
|
|
assert.commandWorked(
|
|
shard0DB.adminCommand({configureFailPoint: kFailPointName, mode: "alwaysOn", data: kFailpointOptions}),
|
|
);
|
|
assert.commandWorked(
|
|
shard1DB.adminCommand({configureFailPoint: kFailPointName, mode: "alwaysOn", data: kFailpointOptions}),
|
|
);
|
|
|
|
// Run a find against mongos. This should open cursors on both of the shards.
|
|
let findCmd = {find: coll.getName(), batchSize: 2, comment: kCommandCommentString};
|
|
|
|
if (useSession) {
|
|
// Manually start a session so it can be continued from inside a parallel shell.
|
|
sessionId = assert.commandWorked(mongosDB.adminCommand({startSession: 1})).id;
|
|
findCmd.lsid = sessionId;
|
|
}
|
|
|
|
let cmdRes = mongosDB.runCommand(findCmd);
|
|
assert.commandWorked(cmdRes);
|
|
cursorId = cmdRes.cursor.id;
|
|
assert.neq(cursorId, NumberLong(0));
|
|
|
|
const parallelShellFn = makeParallelShellFunctionString(cursorId, getMoreErrCodes, useSession, sessionId);
|
|
getMoreJoiner = startParallelShell(parallelShellFn, st.s.port);
|
|
|
|
// Wait until we know the mongod cursors are pinned.
|
|
const curOpFilter = {"command.comment": kCommandCommentString};
|
|
waitForCurOpByFailPoint(shard0DB, coll.getFullName(), kFailPointName, curOpFilter);
|
|
waitForCurOpByFailPoint(shard1DB, coll.getFullName(), kFailPointName, curOpFilter);
|
|
|
|
// Use the function provided by the caller to kill the sharded query.
|
|
killFunc(cursorId, sessionId);
|
|
|
|
// The getMore should finish now that we've killed the cursor (even though the failpoint
|
|
// is still enabled).
|
|
getMoreJoiner();
|
|
getMoreJoiner = null;
|
|
|
|
// By now, the getMore run against the mongos has returned with an indication that the
|
|
// cursor has been killed. Verify that the cursor is really gone by running a
|
|
// killCursors command, and checking that the cursor is reported as "not found".
|
|
let killRes = mongosDB.runCommand({killCursors: coll.getName(), cursors: [cursorId]});
|
|
assert.commandWorked(killRes);
|
|
assert.eq(killRes.cursorsAlive, []);
|
|
assert.eq(killRes.cursorsNotFound, [cursorId]);
|
|
assert.eq(killRes.cursorsUnknown, []);
|
|
|
|
// Eventually the cursors on the mongods should also be cleaned up. They should be
|
|
// killed by mongos when the mongos cursor gets killed.
|
|
function logActiveOpsAndIdleCursors(shardDB) {
|
|
return () =>
|
|
"assert.soon failed: " +
|
|
tojson(
|
|
shardDB
|
|
.getSiblingDB("admin")
|
|
.aggregate([{$currentOp: {idleCursors: true}}])
|
|
.toArray(),
|
|
);
|
|
}
|
|
assert.soon(
|
|
() =>
|
|
shard0DB
|
|
.getSiblingDB("admin")
|
|
.aggregate([{$currentOp: {idleCursors: true}}, {$match: curOpFilter}])
|
|
.itcount() == 0,
|
|
logActiveOpsAndIdleCursors(shard0DB),
|
|
);
|
|
assert.soon(
|
|
() =>
|
|
shard1DB
|
|
.getSiblingDB("admin")
|
|
.aggregate([{$currentOp: {idleCursors: true}}, {$match: curOpFilter}])
|
|
.itcount() == 0,
|
|
logActiveOpsAndIdleCursors(shard1DB),
|
|
);
|
|
} finally {
|
|
assert.commandWorked(shard0DB.adminCommand({configureFailPoint: kFailPointName, mode: "off"}));
|
|
assert.commandWorked(shard1DB.adminCommand({configureFailPoint: kFailPointName, mode: "off"}));
|
|
if (getMoreJoiner) {
|
|
getMoreJoiner();
|
|
}
|
|
}
|
|
}
|
|
|
|
for (let useSession of [true, false]) {
|
|
// Test that running 'killCursors' against a pinned mongos cursor (with pinned mongod
|
|
// cursors) correctly cleans up all of the involved cursors.
|
|
testShardedKillPinned({
|
|
killFunc: function (mongosCursorId) {
|
|
// Run killCursors against the mongos cursor. Verify that the cursor is reported as
|
|
// killed successfully, and does not hang or return a "CursorInUse" error.
|
|
let cmdRes = mongosDB.runCommand({killCursors: coll.getName(), cursors: [mongosCursorId]});
|
|
assert.commandWorked(cmdRes);
|
|
assert.eq(cmdRes.cursorsKilled, [mongosCursorId]);
|
|
assert.eq(cmdRes.cursorsAlive, []);
|
|
assert.eq(cmdRes.cursorsNotFound, []);
|
|
assert.eq(cmdRes.cursorsUnknown, []);
|
|
},
|
|
getMoreErrCodes: ErrorCodes.CursorKilled,
|
|
useSession: useSession,
|
|
});
|
|
|
|
// Test that running killOp against one of the cursors pinned on mongod causes all involved
|
|
// cursors to be killed.
|
|
testShardedKillPinned({
|
|
// This function ignores the mongos cursor id, since it instead uses currentOp to
|
|
// obtain an op id to kill.
|
|
killFunc: function () {
|
|
let currentGetMoresArray = shard0DB
|
|
.getSiblingDB("admin")
|
|
.aggregate([
|
|
{$currentOp: {}},
|
|
{
|
|
$match: {
|
|
"command.getMore": {$exists: true},
|
|
"command.comment": kCommandCommentString,
|
|
},
|
|
},
|
|
])
|
|
.toArray();
|
|
assert.eq(1, currentGetMoresArray.length, currentGetMoresArray);
|
|
let currentGetMore = currentGetMoresArray[0];
|
|
let killOpResult = shard0DB.killOp(currentGetMore.opid);
|
|
assert.commandWorked(killOpResult);
|
|
},
|
|
getMoreErrCodes: ErrorCodes.Interrupted,
|
|
useSession: useSession,
|
|
});
|
|
|
|
// Test that running killCursors against one of the cursors pinned on mongod causes all
|
|
// involved cursors to be killed.
|
|
testShardedKillPinned({
|
|
// This function ignores the mongos cursor id, since it instead uses currentOp to
|
|
// obtain the cursor id of one of the shard cursors.
|
|
killFunc: function () {
|
|
let currentGetMoresArray = shard0DB
|
|
.getSiblingDB("admin")
|
|
.aggregate([
|
|
{$currentOp: {}},
|
|
{
|
|
$match: {
|
|
"command.getMore": {$exists: true},
|
|
"command.comment": kCommandCommentString,
|
|
},
|
|
},
|
|
])
|
|
.toArray();
|
|
assert.eq(1, currentGetMoresArray.length, currentGetMoresArray);
|
|
let currentGetMore = currentGetMoresArray[0];
|
|
let shardCursorId = currentGetMore.command.getMore;
|
|
let cmdRes = shard0DB.runCommand({killCursors: coll.getName(), cursors: [shardCursorId]});
|
|
assert.commandWorked(cmdRes);
|
|
assert.eq(cmdRes.cursorsKilled, [shardCursorId]);
|
|
assert.eq(cmdRes.cursorsAlive, []);
|
|
assert.eq(cmdRes.cursorsNotFound, []);
|
|
assert.eq(cmdRes.cursorsUnknown, []);
|
|
},
|
|
getMoreErrCodes: ErrorCodes.CursorKilled,
|
|
useSession: useSession,
|
|
});
|
|
}
|
|
|
|
// Test that running killSessions on the session which is running the getMore causes the
|
|
// cursor to be killed.
|
|
testShardedKillPinned({
|
|
// This function ignores the mongos cursor id, since it instead uses the session id of the
|
|
// original findCmd session as well as the session running getMore.
|
|
killFunc: function (cursorId, sessionId) {
|
|
assert.commandWorked(mongosDB.runCommand({killSessions: [sessionId]}));
|
|
},
|
|
// Killing a session on mongos kills all matching remote cursors (through KillCursors) then
|
|
// all matching local operations (through KillOp), so the getMore can fail with either
|
|
// CursorKilled or Interrupted depending on which response is returned first.
|
|
getMoreErrCodes: [ErrorCodes.CursorKilled, ErrorCodes.Interrupted],
|
|
useSession: true,
|
|
});
|
|
|
|
st.stop();
|