SERVER-103955 Primary only transitions to commitQuorumSatisified after itself has voted (#45227)

GitOrigin-RevId: 545fb6457f156a096fb804f73bcde18b8fc73fc7
This commit is contained in:
Wei Hu 2025-12-15 09:55:37 -08:00 committed by MongoDB Bot
parent 89cabc7dbd
commit 9bc3459ce8
9 changed files with 181 additions and 8 deletions

View File

@ -105,6 +105,8 @@ last-continuous:
ticket: SERVER-86326 ticket: SERVER-86326
- test_file: jstests/core/query/boolean_simplifier_stress.js - test_file: jstests/core/query/boolean_simplifier_stress.js
ticket: SERVER-114126 ticket: SERVER-114126
- test_file: jstests/replsets/rollback_index_build_start_abort.js
ticket: SERVER-103955
suites: null suites: null
last-lts: last-lts:
all: all:
@ -712,4 +714,6 @@ last-lts:
ticket: SERVER-86326 ticket: SERVER-86326
- test_file: jstests/core/query/boolean_simplifier_stress.js - test_file: jstests/core/query/boolean_simplifier_stress.js
ticket: SERVER-114126 ticket: SERVER-114126
- test_file: jstests/replsets/rollback_index_build_start_abort.js
ticket: SERVER-103955
suites: null suites: null

View File

@ -0,0 +1,68 @@
/**
* Tests that even when enough secondaries have voted to commit an index build, the primary does
* not consider commit quorum satisfied if itself has not completed. The index build can still
* be aborted on the primary instead of hanging indefinitely.
*
* @tags: [
* requires_commit_quorum,
* requires_replication,
* ]
*/
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
import {ReplSetTest} from "jstests/libs/replsettest.js";
import {IndexBuildTest} from "jstests/noPassthrough/libs/index_builds/index_build.js";
const rst = new ReplSetTest({nodes: 2});
rst.startSet();
rst.initiate();
const primary = rst.getPrimary();
const testDB = primary.getDB("test");
const coll = testDB.getCollection("test");
assert.commandWorked(coll.insert({a: 1}));
const secondary = rst.getSecondary();
const secondaryDB = secondary.getDB(testDB.getName());
const secondaryColl = secondaryDB.getCollection(coll.getName());
// Pause primary index build after starting.
IndexBuildTest.pauseIndexBuilds(primary);
// Pause secondary index build after voting for commit.
const hangAfterVoteCommit = configureFailPoint(secondaryDB, "hangIndexBuildAfterSignalPrimaryForCommitReadiness");
jsTest.log.info("Waiting for index build to start");
const createIdx = IndexBuildTest.startIndexBuild(
primary,
coll.getFullName(),
{a: 1},
null,
/* expectedFailures */ [ErrorCodes.Interrupted],
/* commitQuorum */ 1,
);
// Wait for the index build to start on both nodes.
const opId = IndexBuildTest.waitForIndexBuildToStart(testDB, coll.getName(), "a_1");
IndexBuildTest.assertIndexBuildCurrentOpContents(testDB, opId);
const secondaryOpId = IndexBuildTest.waitForIndexBuildToStart(secondaryDB, coll.getName(), "a_1");
IndexBuildTest.assertIndexBuildCurrentOpContents(secondaryDB, secondaryOpId);
jsTest.log.info("Waiting for secondary to vote to commit the index");
hangAfterVoteCommit.wait();
IndexBuildTest.assertIndexesSoon(secondaryColl, 2, ["_id_", "a_1"]);
// Primary should not consider commit quorum satisfied and still allow to abort.
IndexBuildTest.assertIndexesSoon(coll, 2, ["_id_", "a_1"]);
testDB.killOp(opId);
jsTest.log.info("Waiting for index build to stop");
IndexBuildTest.waitForIndexBuildToStop(testDB);
IndexBuildTest.assertIndexesSoon(coll, 1, ["_id_"]);
IndexBuildTest.waitForIndexBuildToStop(secondaryDB);
IndexBuildTest.assertIndexesSoon(secondaryColl, 1, ["_id_"]);
const exitCode = createIdx();
assert.eq(0, exitCode, "expected shell to exit successfully");
rst.stopSet();

View File

@ -0,0 +1,91 @@
/**
* Tests that if secondaries have voted but the primary has not, and if a secondary steps up and sees
* that commit quorum is satisfied, it proceeds to commit the index build.
*
* @tags: [
* requires_commit_quorum,
* requires_replication,
* ]
*/
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
import {ReplSetTest} from "jstests/libs/replsettest.js";
import {IndexBuildTest} from "jstests/noPassthrough/libs/index_builds/index_build.js";
const rst = new ReplSetTest({nodes: 2});
rst.startSet();
rst.initiate();
const primary = rst.getPrimary();
const testDB = primary.getDB("test");
const coll = testDB.getCollection("test");
assert.commandWorked(coll.insert({a: 1}));
const secondary = rst.getSecondary();
const secondaryDB = secondary.getDB(testDB.getName());
const secondaryColl = secondaryDB.getCollection(coll.getName());
// Pause primary index build after starting.
IndexBuildTest.pauseIndexBuilds(primary);
jsTest.log.info("Waiting for index build to start");
const createIdx = IndexBuildTest.startIndexBuild(
primary,
coll.getFullName(),
{a: 1},
null,
/* expectedFailures */ [ErrorCodes.InterruptedDueToReplStateChange],
/* commitQuorum */ 1,
);
// Wait for the index build to start on both nodes.
const opId = IndexBuildTest.waitForIndexBuildToStart(testDB, coll.getName(), "a_1");
IndexBuildTest.assertIndexBuildCurrentOpContents(testDB, opId);
IndexBuildTest.assertIndexesSoon(coll, 2, ["_id_", "a_1"]);
const secondaryOpId = IndexBuildTest.waitForIndexBuildToStart(secondaryDB, coll.getName(), "a_1");
IndexBuildTest.assertIndexBuildCurrentOpContents(secondaryDB, secondaryOpId);
IndexBuildTest.assertIndexesSoon(secondaryColl, 2, ["_id_", "a_1"]);
// Before stepping down primary, make sure secondary pauses on step up.
const hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum = configureFailPoint(
secondaryDB,
"hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum",
);
jsTest.log.info("Waiting for primary to step down");
rst.awaitReplication();
const stepDown = startParallelShell(() => {
assert.commandWorked(db.adminCommand({"replSetStepDown": 60, "force": false}));
}, primary.port);
// Wait for stepdown to complete.
stepDown();
// The index build on old primary will continue in the background.
const exitCode = createIdx();
assert.eq(0, exitCode, "expected shell to exit successfully");
jsTest.log.info("Waiting for secondary to step up and satisfy commit quorum as new primary");
hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum.wait();
// Resume index builds on both nodes.
IndexBuildTest.resumeIndexBuilds(primary);
hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum.off();
jsTest.log.info("Waiting for index build to stop");
IndexBuildTest.waitForIndexBuildToStop(testDB);
IndexBuildTest.waitForIndexBuildToStop(secondaryDB);
// Expect "Index build: completed successfully" in the log.
checkLog.containsJson(primary, 20663, {
namespace: coll.getFullName(),
indexesBuilt: ["a_1"],
numIndexesAfter: 2,
});
checkLog.containsJson(secondary, 20663, {
namespace: coll.getFullName(),
indexesBuilt: ["a_1"],
numIndexesAfter: 2,
});
rst.stopSet();

View File

@ -111,7 +111,7 @@ export class RollbackIndexBuildsTest {
var errcodes = self.expectedErrors ? self.expectedErrors : []; var errcodes = self.expectedErrors ? self.expectedErrors : [];
// This test creates indexes with majority of nodes not available for // This test creates indexes with majority of nodes not available for
// replication. So, disabling index build commit quorum. // replication, so set index build commit quorum to 1.
indexBuilds.push( indexBuilds.push(
IndexBuildTest.startIndexBuild( IndexBuildTest.startIndexBuild(
primary, primary,
@ -119,7 +119,7 @@ export class RollbackIndexBuildsTest {
indexSpec, indexSpec,
{}, {},
errcodes, errcodes,
0, 1,
), ),
); );

View File

@ -1593,7 +1593,7 @@ bool IndexBuildsCoordinator::abortIndexBuildByBuildUUID(OperationContext* opCtx,
gFeatureFlagIntentRegistration.isEnabled()); gFeatureFlagIntentRegistration.isEnabled());
// Override the 'signalAction' as this is an initial syncing node. // Override the 'signalAction' as this is an initial syncing node.
// Don't override it if it's a rollback abort which would be explictly requested // Don't override it if it's a rollback abort which would be explicitly requested
// by the initial sync code. // by the initial sync code.
auto replCoord = repl::ReplicationCoordinator::get(opCtx); auto replCoord = repl::ReplicationCoordinator::get(opCtx);
if (replCoord->getMemberState().startup2() && if (replCoord->getMemberState().startup2() &&

View File

@ -680,7 +680,7 @@ protected:
/** /**
* Runs the index build on the caller thread. Handles unregistering the index build and setting * Runs the index build on the caller thread. Handles unregistering the index build and setting
* the index build's Promise with the outcome of the index build. * the index build's Promise with the outcome of the index build.
* 'IndexBuildOptios::replSetAndNotPrimary' is determined at the start of the index build. * 'IndexBuildOptions::replSetAndNotPrimary' is determined at the start of the index build.
*/ */
void _runIndexBuild(OperationContext* opCtx, void _runIndexBuild(OperationContext* opCtx,
const UUID& buildUUID, const UUID& buildUUID,
@ -809,7 +809,7 @@ protected:
/** /**
* Attempt to signal the index build to commit and advance the index build to the * Attempt to signal the index build to commit and advance the index build to the
* kApplyCommitOplogEntry state. Returns true if successful and false if the attempt was * kApplyCommitOplogEntry state. Returns true if successful and false if the attempt was
* unnecessful and the caller should retry. * unsuccessful and the caller should retry.
*/ */
bool _tryCommit(OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState); bool _tryCommit(OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState);
/** /**

View File

@ -678,7 +678,14 @@ bool IndexBuildsCoordinatorMongod::_signalIfCommitQuorumIsSatisfied(
if (!voteMemberList) if (!voteMemberList)
return false; return false;
bool commitQuorumSatisfied = repl::ReplicationCoordinator::get(opCtx)->isCommitQuorumSatisfied( const auto replCoord = repl::ReplicationCoordinator::get(opCtx);
if (std::find(voteMemberList->begin(), voteMemberList->end(), replCoord->getMyHostAndPort()) ==
voteMemberList->end()) {
// Only after primary has committed can we proceed to check for commit quorum satisfied.
return false;
}
bool commitQuorumSatisfied = replCoord->isCommitQuorumSatisfied(
indexBuildEntry.getCommitQuorum(), voteMemberList.value()); indexBuildEntry.getCommitQuorum(), voteMemberList.value());
if (!commitQuorumSatisfied) if (!commitQuorumSatisfied)

View File

@ -394,7 +394,9 @@ TEST_F(IndexBuildsCoordinatorMongodTest, SetCommitQuorumFailsToTurnCommitQuorumF
ASSERT_EQUALS(ErrorCodes::BadValue, status); ASSERT_EQUALS(ErrorCodes::BadValue, status);
ASSERT_OK(_indexBuildsCoord->voteCommitIndexBuild( ASSERT_OK(_indexBuildsCoord->voteCommitIndexBuild(
operationContext(), buildUUID, HostAndPort("test1", 1234))); operationContext(),
buildUUID,
repl::ReplicationCoordinator::get(operationContext())->getMyHostAndPort()));
assertGet(testFoo1Future.getNoThrow()); assertGet(testFoo1Future.getNoThrow());
} }

View File

@ -418,7 +418,8 @@ int ReplicationCoordinatorMock::getMyId() const {
} }
HostAndPort ReplicationCoordinatorMock::getMyHostAndPort() const { HostAndPort ReplicationCoordinatorMock::getMyHostAndPort() const {
return HostAndPort(); // Set to a non-empty value to satisfy the deserializer parser.
return HostAndPort("test1", 1234);
} }
boost::optional<int> ReplicationCoordinatorMock::getMyMaintenancePort() const { boost::optional<int> ReplicationCoordinatorMock::getMyMaintenancePort() const {