mongo/jstests/replsets/drain.js

119 lines
5.0 KiB
JavaScript

// Test drain mode when transitioning to PRIMARY
// 1. Set up a 3-node set.
// 2. Prevent applying retrieved ops on the SECONDARY.
// 3. Insert data to ensure the SECONDARY has ops to apply in its queue.
// 4. Shutdown PRIMARY.
// 5. Wait for SECONDARY to become PRIMARY.
// 6. Confirm that the new PRIMARY cannot accept writes while in drain mode.
// 6a. Confirm that the new PRIMARY cannot accept reads while in drain mode.
// 7. Enable applying ops.
// 8. Ensure the ops in queue are applied and that the PRIMARY begins to accept writes as usual.
import {FeatureFlagUtil} from "jstests/libs/feature_flag_util.js";
import {ReplSetTest} from "jstests/libs/replsettest.js";
let replSet = new ReplSetTest({name: "testSet", nodes: 3});
let nodes = replSet.nodeList();
replSet.startSet();
replSet.initiate(
{
"_id": "testSet",
"members": [
{"_id": 0, "host": nodes[0]},
{"_id": 1, "host": nodes[1]},
{"_id": 2, "host": nodes[2], "arbiterOnly": true},
],
// No primary catch-up so we focus on the drain mode.
"settings": {"catchUpTimeoutMillis": 0},
},
null,
{initiateWithDefaultElectionTimeout: true},
);
let primary = replSet.getPrimary();
let secondary = replSet.getSecondary();
// The default WC is majority and rsSyncApplyStop failpoint will prevent satisfying any majority
// writes.
assert.commandWorked(
primary.adminCommand({setDefaultRWConcern: 1, defaultWriteConcern: {w: 1}, writeConcern: {w: "majority"}}),
);
// Do an initial insert to prevent the secondary from going into recovery
let numDocuments = 20;
let bulk = primary.getDB("foo").foo.initializeUnorderedBulkOp();
let bigString = Array(1024 * 1024).toString();
assert.commandWorked(primary.getDB("foo").foo.insert({big: bigString}));
replSet.awaitReplication();
assert.commandWorked(
secondary.getDB("admin").runCommand({configureFailPoint: "rsSyncApplyStop", mode: "alwaysOn"}),
"failed to enable fail point on secondary",
);
// Wait for Oplog Applier to hang on the failpoint.
checkLog.contains(secondary, "rsSyncApplyStop fail point enabled. Blocking until fail point is disabled");
const reduceMajorityWriteLatency = FeatureFlagUtil.isPresentAndEnabled(secondary, "ReduceMajorityWriteLatency");
let bufferCountBefore = reduceMajorityWriteLatency
? secondary.getDB("foo").serverStatus().metrics.repl.buffer.write.count
: secondary.getDB("foo").serverStatus().metrics.repl.buffer.count;
for (let i = 1; i < numDocuments; ++i) {
bulk.insert({big: bigString});
}
assert.commandWorked(bulk.execute());
jsTestLog("Number of documents inserted into collection on primary: " + numDocuments);
assert.eq(numDocuments, primary.getDB("foo").foo.find().itcount());
assert.soon(
function () {
let serverStatus = secondary.getDB("foo").serverStatus();
let bufferCount = reduceMajorityWriteLatency
? serverStatus.metrics.repl.buffer.write.count
: serverStatus.metrics.repl.buffer.count;
let bufferCountChange = bufferCount - bufferCountBefore;
jsTestLog("Number of operations buffered on secondary since stopping applier: " + bufferCountChange);
return bufferCountChange >= numDocuments - 1;
},
"secondary did not buffer operations for new inserts on primary",
300000,
1000,
);
// Kill primary; secondary will enter drain mode to catch up
primary.getDB("admin").shutdownServer({force: true});
replSet.waitForState(secondary, ReplSetTest.State.PRIMARY);
// Ensure new primary is not yet writable
jsTestLog("New primary should not be writable yet");
assert.writeError(secondary.getDB("foo").flag.insert({sentinel: 2}));
assert(!secondary.getDB("admin").runCommand({"hello": 1}).isWritablePrimary);
// Ensure new primary is not yet readable without secondaryOk bit.
secondary.setSecondaryOk(false);
jsTestLog("New primary should not be readable yet, without secondaryOk bit");
let res = secondary.getDB("foo").runCommand({find: "foo"});
assert.commandFailed(res);
assert.eq(ErrorCodes.NotPrimaryNoSecondaryOk, res.code, "find failed with unexpected error code: " + tojson(res));
// Nor should it be readable with the secondaryOk bit.
secondary.setSecondaryOk();
assert.commandWorked(secondary.getDB("foo").runCommand({find: "foo"}));
assert(!secondary.adminCommand({"hello": 1}).isWritablePrimary);
// Allow draining to complete
jsTestLog("Disabling fail point on new primary to allow draining to complete");
assert.commandWorked(
secondary.getDB("admin").runCommand({configureFailPoint: "rsSyncApplyStop", mode: "off"}),
"failed to disable fail point on new primary",
);
primary = replSet.getPrimary();
// Ensure new primary is writable
jsTestLog("New primary should be writable after draining is complete");
assert.commandWorked(primary.getDB("foo").flag.insert({sentinel: 1}));
// Check for at least two entries. There was one prior to freezing op application on the
// secondary and we cannot guarantee all writes reached the secondary's op queue prior to
// shutting down the original primary.
assert.gte(primary.getDB("foo").foo.find().itcount(), 2);
replSet.stopSet();