mirror of https://github.com/mongodb/mongo
188 lines
6.9 KiB
JavaScript
188 lines
6.9 KiB
JavaScript
/**
|
|
* This test simulates workflows which are performed by the Atlas automation agent, where nodes are
|
|
* created or restarted using file system snapshots.
|
|
*
|
|
* @tags: [requires_persistence]
|
|
*/
|
|
|
|
// Set up a standard 3-node replica set. Note the two secondaries are priority 0; this is
|
|
// different than the real Atlas configuration where the secondaries would be electable.
|
|
// Snapshot works only on enterprise.
|
|
if (!buildInfo()["modules"].includes("enterprise")) {
|
|
printjson(buildInfo()["modules"]);
|
|
jsTestLog("Skipping snapshot tests because not running on enterprise.");
|
|
quit();
|
|
}
|
|
|
|
import {backupData} from "jstests/libs/backup_utils.js";
|
|
import {ReplSetTest} from "jstests/libs/replsettest.js";
|
|
|
|
const testName = TestData.testName;
|
|
const rst = new ReplSetTest({
|
|
name: testName,
|
|
nodes: [{}, {rsConfig: {priority: 0}}, {rsConfig: {priority: 0}}],
|
|
useBridge: true,
|
|
// We shorten the election timeout period so the tests with an unhealthy set run and recover
|
|
// faster.
|
|
settings: {electionTimeoutMillis: 2000, heartbeatIntervalMillis: 400},
|
|
});
|
|
rst.startSet();
|
|
rst.initiate();
|
|
// Add some data.
|
|
const primary = rst.getPrimary();
|
|
const testDb = primary.getDB("test");
|
|
assert.commandWorked(testDb[testName].insert([{a: 1}, {b: 2}, {c: 3}]));
|
|
rst.awaitReplication();
|
|
|
|
function disconnectSecondaries(secondariesDown) {
|
|
for (let i = 1; i <= secondariesDown; i++) {
|
|
for (const node of rst.nodes) {
|
|
if (node !== rst.nodes[i]) {
|
|
node.disconnect(rst.nodes[i]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
function reconnectSecondaries() {
|
|
for (const node of rst.nodes) {
|
|
for (const node2 of rst.nodes) {
|
|
if (node2 !== node) {
|
|
node2.reconnect(node);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
function testAddWithSnapshot(secondariesDown) {
|
|
const newdbpath = MongoRunner.dataPath + "newNode";
|
|
jsTestLog("Making snapshot of primary");
|
|
backupData(primary, newdbpath);
|
|
// Add some data after the backup.
|
|
assert.commandWorked(
|
|
testDb[testName].insert(
|
|
{addWithSnapshotAfterSnapshot: secondariesDown, msg: "Data written after backup"},
|
|
{writeConcern: {w: 1}},
|
|
),
|
|
);
|
|
let config = rst.getReplSetConfigFromNode();
|
|
secondariesDown = secondariesDown || 0;
|
|
disconnectSecondaries(secondariesDown);
|
|
const useForce = secondariesDown > 1;
|
|
if (useForce) {
|
|
// Wait for the set to become unhealthy.
|
|
rst.awaitSecondaryNodes(null, [primary]);
|
|
}
|
|
// Atlas always adds nodes with 0 votes and priority
|
|
const newNode = rst.add({rsConfig: {votes: 0, priority: 0}, noCleanData: true, dbpath: newdbpath});
|
|
// The second disconnect ensures we can't reach the new node from the 'down' nodes.
|
|
disconnectSecondaries(secondariesDown);
|
|
const newConfig = rst.getReplSetConfig();
|
|
config.members = newConfig.members;
|
|
config.version += 1;
|
|
jsTestLog("Reconfiguring set to add node.");
|
|
assert.commandWorked(
|
|
primary.adminCommand({replSetReconfig: config, maxTimeMS: ReplSetTest.kDefaultTimeoutMS, force: useForce}),
|
|
);
|
|
|
|
jsTestLog("Waiting for node to sync.");
|
|
rst.awaitSecondaryNodes(null, [newNode]);
|
|
|
|
jsTestLog("Reconfiguring added node to have votes");
|
|
config = rst.getReplSetConfigFromNode(primary.nodeId);
|
|
config.version += 1;
|
|
config.members[3].votes = 1;
|
|
assert.commandWorked(
|
|
primary.adminCommand({replSetReconfig: config, maxTimeMS: ReplSetTest.kDefaultTimeoutMS, force: useForce}),
|
|
);
|
|
if (!useForce) {
|
|
// Make sure we can replicate to it. This only works if the set was healthy, otherwise we
|
|
// can't.
|
|
assert.commandWorked(
|
|
testDb[testName].insert(
|
|
{addWithSnapshot: secondariesDown, msg: "Replicating write to new node"},
|
|
{writeConcern: {w: 1}},
|
|
),
|
|
);
|
|
rst.awaitReplication(undefined, undefined, [newNode]);
|
|
}
|
|
|
|
// Make sure the set is still consistent after adding the node.
|
|
reconnectSecondaries();
|
|
// If we were in a majority-down scenario, wait for the primary to be re-elected.
|
|
assert.soon(() => primary == rst.getPrimary());
|
|
rst.checkOplogs();
|
|
rst.checkReplicatedDataHashes();
|
|
|
|
// Stabilize the cluster before removing the new node.
|
|
assert.commandWorked(testDb[testName].insert({addWithSnapshot: secondariesDown, msg: "Reconnected secondaries"}));
|
|
rst.awaitReplication();
|
|
|
|
// Remove our extra node.
|
|
rst.stop(newNode);
|
|
rst.remove(newNode);
|
|
rst.reInitiate();
|
|
resetDbpath(newdbpath);
|
|
}
|
|
|
|
function testReplaceWithSnapshot(node, secondariesDown) {
|
|
secondariesDown = secondariesDown || 0;
|
|
const useForce = secondariesDown > 1;
|
|
const replacedbpath = rst.getDbPath(node);
|
|
const backupdbpath = replacedbpath + "_bak";
|
|
let config = rst.getReplSetConfigFromNode();
|
|
jsTestLog("Backing up the primary node");
|
|
backupData(primary, backupdbpath);
|
|
// Add some data after the backup.
|
|
assert.commandWorked(testDb[testName].insert({replaceWithSnapshot: secondariesDown}, {writeConcern: {w: 1}}));
|
|
disconnectSecondaries(secondariesDown);
|
|
if (useForce) {
|
|
// Wait for the set to become unhealthy.
|
|
rst.awaitSecondaryNodes(null, [primary]);
|
|
}
|
|
jsTestLog("Stopping node for replacement of data");
|
|
rst.stop(node, undefined, undefined, {forRestart: true});
|
|
|
|
jsTestLog("Replacing node data with snapshot");
|
|
copyDbpath(backupdbpath, replacedbpath);
|
|
resetDbpath(backupdbpath);
|
|
jsTestLog("Restarting replacement node.");
|
|
rst.start(node, undefined, true /* restart */);
|
|
// We can't use awaitSecondaryNodes because the set might not be healthy.
|
|
assert.soonNoExcept(() => node.adminCommand({isMaster: 1}).secondary);
|
|
if (!useForce) {
|
|
// Make sure we can replicate to it, if the set is otherwise healthy.
|
|
rst.awaitReplication(undefined, undefined, [node]);
|
|
}
|
|
// Make sure the set is still consistent after resyncing the node.
|
|
reconnectSecondaries();
|
|
// If we were in a majority-down scenario, wait for the primary to be re-elected.
|
|
assert.soon(() => primary == rst.getPrimary());
|
|
rst.checkOplogs();
|
|
rst.checkReplicatedDataHashes();
|
|
}
|
|
|
|
jsTestLog("Test adding a node with snapshot in a healthy system.");
|
|
testAddWithSnapshot(0);
|
|
|
|
jsTestLog("Test adding a node with snapshot with one secondary unreachable.");
|
|
testAddWithSnapshot(1);
|
|
|
|
jsTestLog("Test adding a node with snapshot with two secondaries unreachable.");
|
|
testAddWithSnapshot(2);
|
|
|
|
jsTestLog("Adding node for replace-node scenarios");
|
|
let newNode = rst.add({rsConfig: {priority: 0}});
|
|
rst.reInitiate();
|
|
|
|
jsTestLog("Test replacing a node with snapshot in a healthy system.");
|
|
testReplaceWithSnapshot(newNode, 0);
|
|
|
|
jsTestLog("Test replacing a node with snapshot with one secondary unreachable.");
|
|
testReplaceWithSnapshot(newNode, 1);
|
|
|
|
jsTestLog("Test replacing a node with snapshot with two secondaries unreachable.");
|
|
testReplaceWithSnapshot(newNode, 2);
|
|
|
|
rst.stopSet();
|