mirror of https://github.com/mongodb/mongo
SERVER-103955 Primary only transitions to commitQuorumSatisified after itself has voted (#45227)
GitOrigin-RevId: 545fb6457f156a096fb804f73bcde18b8fc73fc7
This commit is contained in:
parent
89cabc7dbd
commit
9bc3459ce8
|
|
@ -105,6 +105,8 @@ last-continuous:
|
|||
ticket: SERVER-86326
|
||||
- test_file: jstests/core/query/boolean_simplifier_stress.js
|
||||
ticket: SERVER-114126
|
||||
- test_file: jstests/replsets/rollback_index_build_start_abort.js
|
||||
ticket: SERVER-103955
|
||||
suites: null
|
||||
last-lts:
|
||||
all:
|
||||
|
|
@ -712,4 +714,6 @@ last-lts:
|
|||
ticket: SERVER-86326
|
||||
- test_file: jstests/core/query/boolean_simplifier_stress.js
|
||||
ticket: SERVER-114126
|
||||
- test_file: jstests/replsets/rollback_index_build_start_abort.js
|
||||
ticket: SERVER-103955
|
||||
suites: null
|
||||
|
|
|
|||
|
|
@ -0,0 +1,68 @@
|
|||
/**
|
||||
* Tests that even when enough secondaries have voted to commit an index build, the primary does
|
||||
* not consider commit quorum satisfied if itself has not completed. The index build can still
|
||||
* be aborted on the primary instead of hanging indefinitely.
|
||||
*
|
||||
* @tags: [
|
||||
* requires_commit_quorum,
|
||||
* requires_replication,
|
||||
* ]
|
||||
*/
|
||||
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
|
||||
import {ReplSetTest} from "jstests/libs/replsettest.js";
|
||||
import {IndexBuildTest} from "jstests/noPassthrough/libs/index_builds/index_build.js";
|
||||
|
||||
const rst = new ReplSetTest({nodes: 2});
|
||||
rst.startSet();
|
||||
rst.initiate();
|
||||
|
||||
const primary = rst.getPrimary();
|
||||
const testDB = primary.getDB("test");
|
||||
const coll = testDB.getCollection("test");
|
||||
|
||||
assert.commandWorked(coll.insert({a: 1}));
|
||||
|
||||
const secondary = rst.getSecondary();
|
||||
const secondaryDB = secondary.getDB(testDB.getName());
|
||||
const secondaryColl = secondaryDB.getCollection(coll.getName());
|
||||
|
||||
// Pause primary index build after starting.
|
||||
IndexBuildTest.pauseIndexBuilds(primary);
|
||||
// Pause secondary index build after voting for commit.
|
||||
const hangAfterVoteCommit = configureFailPoint(secondaryDB, "hangIndexBuildAfterSignalPrimaryForCommitReadiness");
|
||||
|
||||
jsTest.log.info("Waiting for index build to start");
|
||||
const createIdx = IndexBuildTest.startIndexBuild(
|
||||
primary,
|
||||
coll.getFullName(),
|
||||
{a: 1},
|
||||
null,
|
||||
/* expectedFailures */ [ErrorCodes.Interrupted],
|
||||
/* commitQuorum */ 1,
|
||||
);
|
||||
|
||||
// Wait for the index build to start on both nodes.
|
||||
const opId = IndexBuildTest.waitForIndexBuildToStart(testDB, coll.getName(), "a_1");
|
||||
IndexBuildTest.assertIndexBuildCurrentOpContents(testDB, opId);
|
||||
const secondaryOpId = IndexBuildTest.waitForIndexBuildToStart(secondaryDB, coll.getName(), "a_1");
|
||||
IndexBuildTest.assertIndexBuildCurrentOpContents(secondaryDB, secondaryOpId);
|
||||
|
||||
jsTest.log.info("Waiting for secondary to vote to commit the index");
|
||||
hangAfterVoteCommit.wait();
|
||||
IndexBuildTest.assertIndexesSoon(secondaryColl, 2, ["_id_", "a_1"]);
|
||||
|
||||
// Primary should not consider commit quorum satisfied and still allow to abort.
|
||||
IndexBuildTest.assertIndexesSoon(coll, 2, ["_id_", "a_1"]);
|
||||
testDB.killOp(opId);
|
||||
|
||||
jsTest.log.info("Waiting for index build to stop");
|
||||
IndexBuildTest.waitForIndexBuildToStop(testDB);
|
||||
IndexBuildTest.assertIndexesSoon(coll, 1, ["_id_"]);
|
||||
|
||||
IndexBuildTest.waitForIndexBuildToStop(secondaryDB);
|
||||
IndexBuildTest.assertIndexesSoon(secondaryColl, 1, ["_id_"]);
|
||||
|
||||
const exitCode = createIdx();
|
||||
assert.eq(0, exitCode, "expected shell to exit successfully");
|
||||
|
||||
rst.stopSet();
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
/**
|
||||
* Tests that if secondaries have voted but the primary has not, and if a secondary steps up and sees
|
||||
* that commit quorum is satisfied, it proceeds to commit the index build.
|
||||
*
|
||||
* @tags: [
|
||||
* requires_commit_quorum,
|
||||
* requires_replication,
|
||||
* ]
|
||||
*/
|
||||
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
|
||||
import {ReplSetTest} from "jstests/libs/replsettest.js";
|
||||
import {IndexBuildTest} from "jstests/noPassthrough/libs/index_builds/index_build.js";
|
||||
|
||||
const rst = new ReplSetTest({nodes: 2});
|
||||
rst.startSet();
|
||||
rst.initiate();
|
||||
|
||||
const primary = rst.getPrimary();
|
||||
const testDB = primary.getDB("test");
|
||||
const coll = testDB.getCollection("test");
|
||||
|
||||
assert.commandWorked(coll.insert({a: 1}));
|
||||
|
||||
const secondary = rst.getSecondary();
|
||||
const secondaryDB = secondary.getDB(testDB.getName());
|
||||
const secondaryColl = secondaryDB.getCollection(coll.getName());
|
||||
|
||||
// Pause primary index build after starting.
|
||||
IndexBuildTest.pauseIndexBuilds(primary);
|
||||
|
||||
jsTest.log.info("Waiting for index build to start");
|
||||
const createIdx = IndexBuildTest.startIndexBuild(
|
||||
primary,
|
||||
coll.getFullName(),
|
||||
{a: 1},
|
||||
null,
|
||||
/* expectedFailures */ [ErrorCodes.InterruptedDueToReplStateChange],
|
||||
/* commitQuorum */ 1,
|
||||
);
|
||||
|
||||
// Wait for the index build to start on both nodes.
|
||||
const opId = IndexBuildTest.waitForIndexBuildToStart(testDB, coll.getName(), "a_1");
|
||||
IndexBuildTest.assertIndexBuildCurrentOpContents(testDB, opId);
|
||||
IndexBuildTest.assertIndexesSoon(coll, 2, ["_id_", "a_1"]);
|
||||
|
||||
const secondaryOpId = IndexBuildTest.waitForIndexBuildToStart(secondaryDB, coll.getName(), "a_1");
|
||||
IndexBuildTest.assertIndexBuildCurrentOpContents(secondaryDB, secondaryOpId);
|
||||
IndexBuildTest.assertIndexesSoon(secondaryColl, 2, ["_id_", "a_1"]);
|
||||
|
||||
// Before stepping down primary, make sure secondary pauses on step up.
|
||||
const hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum = configureFailPoint(
|
||||
secondaryDB,
|
||||
"hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum",
|
||||
);
|
||||
|
||||
jsTest.log.info("Waiting for primary to step down");
|
||||
rst.awaitReplication();
|
||||
const stepDown = startParallelShell(() => {
|
||||
assert.commandWorked(db.adminCommand({"replSetStepDown": 60, "force": false}));
|
||||
}, primary.port);
|
||||
// Wait for stepdown to complete.
|
||||
stepDown();
|
||||
|
||||
// The index build on old primary will continue in the background.
|
||||
const exitCode = createIdx();
|
||||
assert.eq(0, exitCode, "expected shell to exit successfully");
|
||||
|
||||
jsTest.log.info("Waiting for secondary to step up and satisfy commit quorum as new primary");
|
||||
hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum.wait();
|
||||
|
||||
// Resume index builds on both nodes.
|
||||
IndexBuildTest.resumeIndexBuilds(primary);
|
||||
hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum.off();
|
||||
|
||||
jsTest.log.info("Waiting for index build to stop");
|
||||
IndexBuildTest.waitForIndexBuildToStop(testDB);
|
||||
IndexBuildTest.waitForIndexBuildToStop(secondaryDB);
|
||||
|
||||
// Expect "Index build: completed successfully" in the log.
|
||||
checkLog.containsJson(primary, 20663, {
|
||||
namespace: coll.getFullName(),
|
||||
indexesBuilt: ["a_1"],
|
||||
numIndexesAfter: 2,
|
||||
});
|
||||
checkLog.containsJson(secondary, 20663, {
|
||||
namespace: coll.getFullName(),
|
||||
indexesBuilt: ["a_1"],
|
||||
numIndexesAfter: 2,
|
||||
});
|
||||
|
||||
rst.stopSet();
|
||||
|
|
@ -111,7 +111,7 @@ export class RollbackIndexBuildsTest {
|
|||
|
||||
var errcodes = self.expectedErrors ? self.expectedErrors : [];
|
||||
// This test creates indexes with majority of nodes not available for
|
||||
// replication. So, disabling index build commit quorum.
|
||||
// replication, so set index build commit quorum to 1.
|
||||
indexBuilds.push(
|
||||
IndexBuildTest.startIndexBuild(
|
||||
primary,
|
||||
|
|
@ -119,7 +119,7 @@ export class RollbackIndexBuildsTest {
|
|||
indexSpec,
|
||||
{},
|
||||
errcodes,
|
||||
0,
|
||||
1,
|
||||
),
|
||||
);
|
||||
|
||||
|
|
|
|||
|
|
@ -1593,7 +1593,7 @@ bool IndexBuildsCoordinator::abortIndexBuildByBuildUUID(OperationContext* opCtx,
|
|||
gFeatureFlagIntentRegistration.isEnabled());
|
||||
|
||||
// Override the 'signalAction' as this is an initial syncing node.
|
||||
// Don't override it if it's a rollback abort which would be explictly requested
|
||||
// Don't override it if it's a rollback abort which would be explicitly requested
|
||||
// by the initial sync code.
|
||||
auto replCoord = repl::ReplicationCoordinator::get(opCtx);
|
||||
if (replCoord->getMemberState().startup2() &&
|
||||
|
|
|
|||
|
|
@ -680,7 +680,7 @@ protected:
|
|||
/**
|
||||
* Runs the index build on the caller thread. Handles unregistering the index build and setting
|
||||
* the index build's Promise with the outcome of the index build.
|
||||
* 'IndexBuildOptios::replSetAndNotPrimary' is determined at the start of the index build.
|
||||
* 'IndexBuildOptions::replSetAndNotPrimary' is determined at the start of the index build.
|
||||
*/
|
||||
void _runIndexBuild(OperationContext* opCtx,
|
||||
const UUID& buildUUID,
|
||||
|
|
@ -809,7 +809,7 @@ protected:
|
|||
/**
|
||||
* Attempt to signal the index build to commit and advance the index build to the
|
||||
* kApplyCommitOplogEntry state. Returns true if successful and false if the attempt was
|
||||
* unnecessful and the caller should retry.
|
||||
* unsuccessful and the caller should retry.
|
||||
*/
|
||||
bool _tryCommit(OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState);
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -678,7 +678,14 @@ bool IndexBuildsCoordinatorMongod::_signalIfCommitQuorumIsSatisfied(
|
|||
if (!voteMemberList)
|
||||
return false;
|
||||
|
||||
bool commitQuorumSatisfied = repl::ReplicationCoordinator::get(opCtx)->isCommitQuorumSatisfied(
|
||||
const auto replCoord = repl::ReplicationCoordinator::get(opCtx);
|
||||
if (std::find(voteMemberList->begin(), voteMemberList->end(), replCoord->getMyHostAndPort()) ==
|
||||
voteMemberList->end()) {
|
||||
// Only after primary has committed can we proceed to check for commit quorum satisfied.
|
||||
return false;
|
||||
}
|
||||
|
||||
bool commitQuorumSatisfied = replCoord->isCommitQuorumSatisfied(
|
||||
indexBuildEntry.getCommitQuorum(), voteMemberList.value());
|
||||
|
||||
if (!commitQuorumSatisfied)
|
||||
|
|
|
|||
|
|
@ -394,7 +394,9 @@ TEST_F(IndexBuildsCoordinatorMongodTest, SetCommitQuorumFailsToTurnCommitQuorumF
|
|||
ASSERT_EQUALS(ErrorCodes::BadValue, status);
|
||||
|
||||
ASSERT_OK(_indexBuildsCoord->voteCommitIndexBuild(
|
||||
operationContext(), buildUUID, HostAndPort("test1", 1234)));
|
||||
operationContext(),
|
||||
buildUUID,
|
||||
repl::ReplicationCoordinator::get(operationContext())->getMyHostAndPort()));
|
||||
|
||||
assertGet(testFoo1Future.getNoThrow());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -418,7 +418,8 @@ int ReplicationCoordinatorMock::getMyId() const {
|
|||
}
|
||||
|
||||
HostAndPort ReplicationCoordinatorMock::getMyHostAndPort() const {
|
||||
return HostAndPort();
|
||||
// Set to a non-empty value to satisfy the deserializer parser.
|
||||
return HostAndPort("test1", 1234);
|
||||
}
|
||||
|
||||
boost::optional<int> ReplicationCoordinatorMock::getMyMaintenancePort() const {
|
||||
|
|
|
|||
Loading…
Reference in New Issue