mongo/jstests/hooks/run_inject_hello_failures.js

165 lines
6.0 KiB
JavaScript

import newMongoWithRetry from "jstests/libs/retryable_mongo.js";
// Interval between test loops.
const kTestLoopPeriodMs = 20 * 1000;
// Sleep injected to the Hello response at the server side.
const kInjectedHelloDelayMs = 6000 * 1000;
// How many times the fail inject - stepdown cycle is repeated.
const kTestLoops = 1;
// Refresh timeout will be reduced to this interval.
const kRefreshTimeoutSec = 1;
// Connection could be to 'mongos' or 'mongod'.
function getAdminDB(connection) {
let adminDB;
if (typeof connection.getDB === "function") {
adminDB = connection.getDB("admin");
} else {
assert(typeof connection.getSiblingDB === "function", `Cannot get Admin DB from ${tojson(connection)}`);
adminDB = connection.getSiblingDB("admin");
}
return adminDB;
}
function stepDown(connection) {
jsTestLog(`Force stepDown to ${connection}`);
const adminDB = getAdminDB(connection);
let res;
let error;
try {
res = adminDB.runCommand({replSetStepDown: 10, force: true, secondaryCatchUpPeriodSecs: 8});
error = res;
} catch (e) {
error = e;
jsTestLog(`Step down error is usually normal: ${error}`);
}
if (error && (error.code == undefined || error.code == ErrorCodes.HostUnreachable)) {
jsTestLog(`Transient error ${error}`);
return;
}
assert.commandWorked(res);
jsTestLog(`Forced step down to ${connection}, result ${res}`);
}
function stepUp(connection) {
const adminDB = getAdminDB(connection);
assert.soonNoExcept(() => {
const res = adminDB.runCommand({replSetStepUp: 1});
if (!res.ok) {
jsTestLog(`Failed to step up with ${res}`);
}
return res.ok;
}, "Failed to step up");
jsTestLog(`Forced step up to ${connection}`);
}
// The default interval of 30 sec between RSM refresh cycles is too long for
// this test.
function injectReduceRefreshPeriod(connection) {
jsTestLog(`Reduce refresh interval for ${connection}`);
const adminDB = getAdminDB(connection);
assert.commandWorked(
adminDB.runCommand({
configureFailPoint: "modifyReplicaSetMonitorDefaultRefreshPeriod",
mode: "alwaysOn",
data: {period: kRefreshTimeoutSec},
}),
);
const res = adminDB.runCommand({getParameter: 1, "failpoint.modifyReplicaSetMonitorDefaultRefreshPeriod": 1});
assert.commandWorked(res);
assert.eq(res["failpoint.modifyReplicaSetMonitorDefaultRefreshPeriod"].mode, 1);
}
function injectHelloFail(connection) {
jsTestLog(`Inject Hello fail to connection ${connection}`);
const adminDB = getAdminDB(connection);
assert.commandWorked(
adminDB.runCommand({
configureFailPoint: "shardWaitInHello",
mode: "alwaysOn",
data: {internalClient: 1}, // No effect if client is mongo shell.
}),
);
const res = adminDB.runCommand({getParameter: 1, "failpoint.shardWaitInHello": 1});
assert.commandWorked(res);
assert.eq(res["failpoint.shardWaitInHello"].mode, 1);
}
function freeze(connection) {
const adminDB = getAdminDB(connection);
assert.commandWorked(adminDB.runCommand({replSetFreeze: 20}));
}
function getConfigServer(connection) {
const adminDB = getAdminDB(connection);
const res = assert.commandWorked(adminDB.runCommand({serverStatus: 1})).sharding.configsvrConnectionString;
let rx = /.*\/(.*)/g;
let arr = rx.exec(res);
jsTestLog(`Config server: ${arr[1]} extracted from ${tojson(res)}`);
return newMongoWithRetry(arr[1]);
}
function doFailInjectionLoop(db) {
for (let counter = 0; counter < kTestLoops; ++counter) {
let connectionsToPrimaries;
let connectionsToSecondaries = [];
let configServer;
try {
connectionsToPrimaries = FixtureHelpers.getPrimaries(db);
let allReplicaSets = FixtureHelpers.getAllReplicas(db);
for (let replicaSet of allReplicaSets) {
connectionsToSecondaries.push(replicaSet.getSecondaries());
}
configServer = getConfigServer(db);
} catch (e) {
jsTestLog(`Cannot fetch primaries or secondaries: ${e}`);
sleep(kTestLoopPeriodMs);
continue;
}
// This will reduce refresh timeout on mongos and config server as well.
injectReduceRefreshPeriod(db);
injectReduceRefreshPeriod(configServer);
for (let connection of connectionsToPrimaries.concat(FixtureHelpers.getSecondaries(db))) {
injectReduceRefreshPeriod(connection);
}
// The tests usually have 10-20 sec timeout on operations. The default refresh period is 30
// sec.
// After we reduced the refresh timeout we need to wait for the previously scheduled
// timeouts to
// approach before injecting the Hello delay failure.
sleep(25 * 1000);
for (let connection of connectionsToPrimaries) {
injectHelloFail(connection);
}
for (let connection of connectionsToPrimaries) {
stepDown(connection);
freeze(connection);
}
for (let arrayOfSecondaries of connectionsToSecondaries) {
for (let connection of arrayOfSecondaries) {
stepUp(connection);
break; // For each replica set pick one secondary.
}
}
sleep(kTestLoopPeriodMs);
}
}
import {Topology, DiscoverTopology} from "jstests/libs/discover_topology.js";
import {FixtureHelpers} from "jstests/libs/fixture_helpers.js";
assert.eq(typeof db, "object", "Invalid `db` object, is the shell connected to a mongod?");
let cmdLineOpts = db.adminCommand("getCmdLineOpts");
const topology = DiscoverTopology.findConnectedNodes(db.getMongo());
jsTestLog(`Run Hello fail injection in ${JSON.stringify(topology)},
Invoked with ${JSON.stringify(cmdLineOpts)},
topology type ${topology.type}`);
if (topology.type === Topology.kShardedCluster) {
doFailInjectionLoop(db);
}
jsTestLog(`Hello fail hook completed`);