mirror of https://github.com/mongodb/mongo
105 lines
4.4 KiB
JavaScript
105 lines
4.4 KiB
JavaScript
/**
|
|
* Tests that initial sync will abort an attempt if the sync source restarts from an unclean
|
|
* shutdown. And the sync source node increments its rollback id after the unclean shutdown.
|
|
*
|
|
* This is to test resumable initial sync behavior when the sync source restarts after an unclean
|
|
* shutdown. See SERVER-50140 for more details.
|
|
* @tags: [requires_persistence]
|
|
*/
|
|
import {configureFailPoint, kDefaultWaitForFailPointTimeout} from "jstests/libs/fail_point_util.js";
|
|
import {ReplSetTest} from "jstests/libs/replsettest.js";
|
|
|
|
const dbName = "test";
|
|
const collName = "coll";
|
|
|
|
const rst = new ReplSetTest({nodes: 1});
|
|
rst.startSet();
|
|
rst.initiate(null, null, {initiateWithDefaultElectionTimeout: true});
|
|
|
|
let syncSourceNode = rst.getPrimary();
|
|
const syncSourceColl = syncSourceNode.getDB(dbName)[collName];
|
|
|
|
// Insert some initial data to be cloned.
|
|
assert.commandWorked(syncSourceColl.insert([{_id: 1}, {_id: 2}, {_id: 3}]));
|
|
|
|
jsTest.log("Adding a new node to the replica set");
|
|
const initialSyncNode = rst.add({
|
|
rsConfig: {priority: 0, votes: 0},
|
|
setParameter: {
|
|
"failpoint.initialSyncHangBeforeCopyingDatabases": tojson({mode: "alwaysOn"}),
|
|
// Wait for the cloners to finish.
|
|
"failpoint.initialSyncHangAfterDataCloning": tojson({mode: "alwaysOn"}),
|
|
"numInitialSyncAttempts": 1,
|
|
},
|
|
});
|
|
rst.reInitiate();
|
|
|
|
jsTestLog("The initialSyncNode should hang before the database cloning phase");
|
|
checkLog.contains(initialSyncNode, "initialSyncHangBeforeCopyingDatabases fail point enabled");
|
|
|
|
// Pauses the journal flusher and writes with {j: false}. So this data will be lost after the
|
|
// syncSourceNode restarts after an unclean shutdown.
|
|
const journalFp = configureFailPoint(syncSourceNode, "pauseJournalFlusherThread");
|
|
journalFp.wait();
|
|
assert.commandWorked(syncSourceColl.insert({_id: 4}));
|
|
|
|
// Hang the initialSyncNode before initial sync finishes so we can check initial sync failure.
|
|
const beforeFinishFailPoint = configureFailPoint(initialSyncNode, "initialSyncHangBeforeFinish");
|
|
|
|
jsTestLog("Resuming database cloner on the initialSyncNode");
|
|
assert.commandWorked(
|
|
initialSyncNode.adminCommand({configureFailPoint: "initialSyncHangBeforeCopyingDatabases", mode: "off"}),
|
|
);
|
|
|
|
jsTestLog("Waiting for data cloning to complete on the initialSyncNode");
|
|
assert.commandWorked(
|
|
initialSyncNode.adminCommand({
|
|
waitForFailPoint: "initialSyncHangAfterDataCloning",
|
|
timesEntered: 1,
|
|
maxTimeMS: kDefaultWaitForFailPointTimeout,
|
|
}),
|
|
);
|
|
|
|
// Get the rollback id of the sync source before the unclean shutdown.
|
|
const rollbackIdBefore = syncSourceNode.getDB("local").system.rollback.id.findOne();
|
|
|
|
jsTestLog("Shutting down the syncSourceNode uncleanly");
|
|
rst.stop(syncSourceNode, 9, {allowedExitCode: MongoRunner.EXIT_SIGKILL}, {forRestart: true, waitPid: true});
|
|
|
|
// Make sure some retries happen due to resumable initial sync and the initial sync does not
|
|
// immediately fail while the sync source is completely down.
|
|
const nRetries = 2;
|
|
checkLog.containsWithAtLeastCount(initialSyncNode, "Trying to reconnect", nRetries);
|
|
|
|
// Restart the sync source and wait for it to become primary again.
|
|
jsTestLog("Restarting the syncSourceNode");
|
|
rst.start(syncSourceNode, {waitForConnect: true}, true /* restart */);
|
|
syncSourceNode = rst.getPrimary();
|
|
|
|
// Test that the rollback id is incremented after the unclean shutdown.
|
|
const rollbackIdAfter = syncSourceNode.getDB("local").system.rollback.id.findOne();
|
|
assert.eq(
|
|
rollbackIdAfter.rollbackId,
|
|
rollbackIdBefore.rollbackId + 1,
|
|
() => "rollbackIdBefore: " + tojson(rollbackIdBefore) + " rollbackIdAfter: " + tojson(rollbackIdAfter),
|
|
);
|
|
|
|
jsTestLog("Resuming initial sync after the data cloning phase on the initialSyncNode");
|
|
assert.commandWorked(
|
|
initialSyncNode.adminCommand({configureFailPoint: "initialSyncHangAfterDataCloning", mode: "off"}),
|
|
);
|
|
|
|
jsTestLog("Waiting for initial sync to fail on the initialSyncNode");
|
|
beforeFinishFailPoint.wait();
|
|
const res = assert.commandWorked(initialSyncNode.adminCommand({replSetGetStatus: 1}));
|
|
// The initial sync should have failed.
|
|
assert.eq(res.initialSyncStatus.failedInitialSyncAttempts, 1, () => tojson(res.initialSyncStatus));
|
|
beforeFinishFailPoint.off();
|
|
|
|
// Get rid of the failed node so the fixture can stop properly. We expect it to stop with
|
|
// an fassert.
|
|
assert.eq(MongoRunner.EXIT_ABRUPT, waitMongoProgram(initialSyncNode.port));
|
|
rst.remove(initialSyncNode);
|
|
|
|
rst.stopSet();
|