diff --git a/etc/backports_required_for_multiversion_tests.yml b/etc/backports_required_for_multiversion_tests.yml index 3b7d9cc359c..fe3127e9a67 100644 --- a/etc/backports_required_for_multiversion_tests.yml +++ b/etc/backports_required_for_multiversion_tests.yml @@ -105,6 +105,8 @@ last-continuous: ticket: SERVER-86326 - test_file: jstests/core/query/boolean_simplifier_stress.js ticket: SERVER-114126 + - test_file: jstests/replsets/rollback_index_build_start_abort.js + ticket: SERVER-103955 suites: null last-lts: all: @@ -712,4 +714,6 @@ last-lts: ticket: SERVER-86326 - test_file: jstests/core/query/boolean_simplifier_stress.js ticket: SERVER-114126 + - test_file: jstests/replsets/rollback_index_build_start_abort.js + ticket: SERVER-103955 suites: null diff --git a/jstests/noPassthrough/index_builds/primary_can_abort_after_secondaries_voted.js b/jstests/noPassthrough/index_builds/primary_can_abort_after_secondaries_voted.js new file mode 100644 index 00000000000..76ba298581c --- /dev/null +++ b/jstests/noPassthrough/index_builds/primary_can_abort_after_secondaries_voted.js @@ -0,0 +1,68 @@ +/** + * Tests that even when enough secondaries have voted to commit an index build, the primary does + * not consider commit quorum satisfied if itself has not completed. The index build can still + * be aborted on the primary instead of hanging indefinitely. + * + * @tags: [ + * requires_commit_quorum, + * requires_replication, + * ] + */ +import {configureFailPoint} from "jstests/libs/fail_point_util.js"; +import {ReplSetTest} from "jstests/libs/replsettest.js"; +import {IndexBuildTest} from "jstests/noPassthrough/libs/index_builds/index_build.js"; + +const rst = new ReplSetTest({nodes: 2}); +rst.startSet(); +rst.initiate(); + +const primary = rst.getPrimary(); +const testDB = primary.getDB("test"); +const coll = testDB.getCollection("test"); + +assert.commandWorked(coll.insert({a: 1})); + +const secondary = rst.getSecondary(); +const secondaryDB = secondary.getDB(testDB.getName()); +const secondaryColl = secondaryDB.getCollection(coll.getName()); + +// Pause primary index build after starting. +IndexBuildTest.pauseIndexBuilds(primary); +// Pause secondary index build after voting for commit. +const hangAfterVoteCommit = configureFailPoint(secondaryDB, "hangIndexBuildAfterSignalPrimaryForCommitReadiness"); + +jsTest.log.info("Waiting for index build to start"); +const createIdx = IndexBuildTest.startIndexBuild( + primary, + coll.getFullName(), + {a: 1}, + null, + /* expectedFailures */ [ErrorCodes.Interrupted], + /* commitQuorum */ 1, +); + +// Wait for the index build to start on both nodes. +const opId = IndexBuildTest.waitForIndexBuildToStart(testDB, coll.getName(), "a_1"); +IndexBuildTest.assertIndexBuildCurrentOpContents(testDB, opId); +const secondaryOpId = IndexBuildTest.waitForIndexBuildToStart(secondaryDB, coll.getName(), "a_1"); +IndexBuildTest.assertIndexBuildCurrentOpContents(secondaryDB, secondaryOpId); + +jsTest.log.info("Waiting for secondary to vote to commit the index"); +hangAfterVoteCommit.wait(); +IndexBuildTest.assertIndexesSoon(secondaryColl, 2, ["_id_", "a_1"]); + +// Primary should not consider commit quorum satisfied and still allow to abort. +IndexBuildTest.assertIndexesSoon(coll, 2, ["_id_", "a_1"]); +testDB.killOp(opId); + +jsTest.log.info("Waiting for index build to stop"); +IndexBuildTest.waitForIndexBuildToStop(testDB); +IndexBuildTest.assertIndexesSoon(coll, 1, ["_id_"]); + +IndexBuildTest.waitForIndexBuildToStop(secondaryDB); +IndexBuildTest.assertIndexesSoon(secondaryColl, 1, ["_id_"]); + +const exitCode = createIdx(); +assert.eq(0, exitCode, "expected shell to exit successfully"); + +rst.stopSet(); diff --git a/jstests/noPassthrough/index_builds/secondary_completes_index_build_after_stepup.js b/jstests/noPassthrough/index_builds/secondary_completes_index_build_after_stepup.js new file mode 100644 index 00000000000..2c311272f18 --- /dev/null +++ b/jstests/noPassthrough/index_builds/secondary_completes_index_build_after_stepup.js @@ -0,0 +1,91 @@ +/** + * Tests that if secondaries have voted but the primary has not, and if a secondary steps up and sees + * that commit quorum is satisfied, it proceeds to commit the index build. + * + * @tags: [ + * requires_commit_quorum, + * requires_replication, + * ] + */ +import {configureFailPoint} from "jstests/libs/fail_point_util.js"; +import {ReplSetTest} from "jstests/libs/replsettest.js"; +import {IndexBuildTest} from "jstests/noPassthrough/libs/index_builds/index_build.js"; + +const rst = new ReplSetTest({nodes: 2}); +rst.startSet(); +rst.initiate(); + +const primary = rst.getPrimary(); +const testDB = primary.getDB("test"); +const coll = testDB.getCollection("test"); + +assert.commandWorked(coll.insert({a: 1})); + +const secondary = rst.getSecondary(); +const secondaryDB = secondary.getDB(testDB.getName()); +const secondaryColl = secondaryDB.getCollection(coll.getName()); + +// Pause primary index build after starting. +IndexBuildTest.pauseIndexBuilds(primary); + +jsTest.log.info("Waiting for index build to start"); +const createIdx = IndexBuildTest.startIndexBuild( + primary, + coll.getFullName(), + {a: 1}, + null, + /* expectedFailures */ [ErrorCodes.InterruptedDueToReplStateChange], + /* commitQuorum */ 1, +); + +// Wait for the index build to start on both nodes. +const opId = IndexBuildTest.waitForIndexBuildToStart(testDB, coll.getName(), "a_1"); +IndexBuildTest.assertIndexBuildCurrentOpContents(testDB, opId); +IndexBuildTest.assertIndexesSoon(coll, 2, ["_id_", "a_1"]); + +const secondaryOpId = IndexBuildTest.waitForIndexBuildToStart(secondaryDB, coll.getName(), "a_1"); +IndexBuildTest.assertIndexBuildCurrentOpContents(secondaryDB, secondaryOpId); +IndexBuildTest.assertIndexesSoon(secondaryColl, 2, ["_id_", "a_1"]); + +// Before stepping down primary, make sure secondary pauses on step up. +const hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum = configureFailPoint( + secondaryDB, + "hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum", +); + +jsTest.log.info("Waiting for primary to step down"); +rst.awaitReplication(); +const stepDown = startParallelShell(() => { + assert.commandWorked(db.adminCommand({"replSetStepDown": 60, "force": false})); +}, primary.port); +// Wait for stepdown to complete. +stepDown(); + +// The index build on old primary will continue in the background. +const exitCode = createIdx(); +assert.eq(0, exitCode, "expected shell to exit successfully"); + +jsTest.log.info("Waiting for secondary to step up and satisfy commit quorum as new primary"); +hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum.wait(); + +// Resume index builds on both nodes. +IndexBuildTest.resumeIndexBuilds(primary); +hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum.off(); + +jsTest.log.info("Waiting for index build to stop"); +IndexBuildTest.waitForIndexBuildToStop(testDB); +IndexBuildTest.waitForIndexBuildToStop(secondaryDB); + +// Expect "Index build: completed successfully" in the log. +checkLog.containsJson(primary, 20663, { + namespace: coll.getFullName(), + indexesBuilt: ["a_1"], + numIndexesAfter: 2, +}); +checkLog.containsJson(secondary, 20663, { + namespace: coll.getFullName(), + indexesBuilt: ["a_1"], + numIndexesAfter: 2, +}); + +rst.stopSet(); diff --git a/jstests/replsets/libs/rollback_index_builds_test.js b/jstests/replsets/libs/rollback_index_builds_test.js index 4ca2b41f1de..45274002de2 100644 --- a/jstests/replsets/libs/rollback_index_builds_test.js +++ b/jstests/replsets/libs/rollback_index_builds_test.js @@ -111,7 +111,7 @@ export class RollbackIndexBuildsTest { var errcodes = self.expectedErrors ? self.expectedErrors : []; // This test creates indexes with majority of nodes not available for - // replication. So, disabling index build commit quorum. + // replication, so set index build commit quorum to 1. indexBuilds.push( IndexBuildTest.startIndexBuild( primary, @@ -119,7 +119,7 @@ export class RollbackIndexBuildsTest { indexSpec, {}, errcodes, - 0, + 1, ), ); diff --git a/src/mongo/db/index_builds/index_builds_coordinator.cpp b/src/mongo/db/index_builds/index_builds_coordinator.cpp index 13ccc3cd2fb..241f9c0d9f1 100644 --- a/src/mongo/db/index_builds/index_builds_coordinator.cpp +++ b/src/mongo/db/index_builds/index_builds_coordinator.cpp @@ -1593,7 +1593,7 @@ bool IndexBuildsCoordinator::abortIndexBuildByBuildUUID(OperationContext* opCtx, gFeatureFlagIntentRegistration.isEnabled()); // Override the 'signalAction' as this is an initial syncing node. - // Don't override it if it's a rollback abort which would be explictly requested + // Don't override it if it's a rollback abort which would be explicitly requested // by the initial sync code. auto replCoord = repl::ReplicationCoordinator::get(opCtx); if (replCoord->getMemberState().startup2() && diff --git a/src/mongo/db/index_builds/index_builds_coordinator.h b/src/mongo/db/index_builds/index_builds_coordinator.h index b5a157fcac0..5461fcf0946 100644 --- a/src/mongo/db/index_builds/index_builds_coordinator.h +++ b/src/mongo/db/index_builds/index_builds_coordinator.h @@ -680,7 +680,7 @@ protected: /** * Runs the index build on the caller thread. Handles unregistering the index build and setting * the index build's Promise with the outcome of the index build. - * 'IndexBuildOptios::replSetAndNotPrimary' is determined at the start of the index build. + * 'IndexBuildOptions::replSetAndNotPrimary' is determined at the start of the index build. */ void _runIndexBuild(OperationContext* opCtx, const UUID& buildUUID, @@ -809,7 +809,7 @@ protected: /** * Attempt to signal the index build to commit and advance the index build to the * kApplyCommitOplogEntry state. Returns true if successful and false if the attempt was - * unnecessful and the caller should retry. + * unsuccessful and the caller should retry. */ bool _tryCommit(OperationContext* opCtx, std::shared_ptr replState); /** diff --git a/src/mongo/db/index_builds/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds/index_builds_coordinator_mongod.cpp index 923f42c48ed..a59bfcc9586 100644 --- a/src/mongo/db/index_builds/index_builds_coordinator_mongod.cpp +++ b/src/mongo/db/index_builds/index_builds_coordinator_mongod.cpp @@ -678,7 +678,14 @@ bool IndexBuildsCoordinatorMongod::_signalIfCommitQuorumIsSatisfied( if (!voteMemberList) return false; - bool commitQuorumSatisfied = repl::ReplicationCoordinator::get(opCtx)->isCommitQuorumSatisfied( + const auto replCoord = repl::ReplicationCoordinator::get(opCtx); + if (std::find(voteMemberList->begin(), voteMemberList->end(), replCoord->getMyHostAndPort()) == + voteMemberList->end()) { + // Only after primary has committed can we proceed to check for commit quorum satisfied. + return false; + } + + bool commitQuorumSatisfied = replCoord->isCommitQuorumSatisfied( indexBuildEntry.getCommitQuorum(), voteMemberList.value()); if (!commitQuorumSatisfied) diff --git a/src/mongo/db/index_builds/index_builds_coordinator_mongod_test.cpp b/src/mongo/db/index_builds/index_builds_coordinator_mongod_test.cpp index d2fdc027a6f..39d7ff6d557 100644 --- a/src/mongo/db/index_builds/index_builds_coordinator_mongod_test.cpp +++ b/src/mongo/db/index_builds/index_builds_coordinator_mongod_test.cpp @@ -394,7 +394,9 @@ TEST_F(IndexBuildsCoordinatorMongodTest, SetCommitQuorumFailsToTurnCommitQuorumF ASSERT_EQUALS(ErrorCodes::BadValue, status); ASSERT_OK(_indexBuildsCoord->voteCommitIndexBuild( - operationContext(), buildUUID, HostAndPort("test1", 1234))); + operationContext(), + buildUUID, + repl::ReplicationCoordinator::get(operationContext())->getMyHostAndPort())); assertGet(testFoo1Future.getNoThrow()); } diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp index 9ad3799969b..73f9942b2b3 100644 --- a/src/mongo/db/repl/replication_coordinator_mock.cpp +++ b/src/mongo/db/repl/replication_coordinator_mock.cpp @@ -418,7 +418,8 @@ int ReplicationCoordinatorMock::getMyId() const { } HostAndPort ReplicationCoordinatorMock::getMyHostAndPort() const { - return HostAndPort(); + // Set to a non-empty value to satisfy the deserializer parser. + return HostAndPort("test1", 1234); } boost::optional ReplicationCoordinatorMock::getMyMaintenancePort() const {