mirror of https://github.com/mongodb/mongo
SERVER-103955 Primary only transitions to commitQuorumSatisified after itself has voted (#45227)
GitOrigin-RevId: 545fb6457f156a096fb804f73bcde18b8fc73fc7
This commit is contained in:
parent
89cabc7dbd
commit
9bc3459ce8
|
|
@ -105,6 +105,8 @@ last-continuous:
|
||||||
ticket: SERVER-86326
|
ticket: SERVER-86326
|
||||||
- test_file: jstests/core/query/boolean_simplifier_stress.js
|
- test_file: jstests/core/query/boolean_simplifier_stress.js
|
||||||
ticket: SERVER-114126
|
ticket: SERVER-114126
|
||||||
|
- test_file: jstests/replsets/rollback_index_build_start_abort.js
|
||||||
|
ticket: SERVER-103955
|
||||||
suites: null
|
suites: null
|
||||||
last-lts:
|
last-lts:
|
||||||
all:
|
all:
|
||||||
|
|
@ -712,4 +714,6 @@ last-lts:
|
||||||
ticket: SERVER-86326
|
ticket: SERVER-86326
|
||||||
- test_file: jstests/core/query/boolean_simplifier_stress.js
|
- test_file: jstests/core/query/boolean_simplifier_stress.js
|
||||||
ticket: SERVER-114126
|
ticket: SERVER-114126
|
||||||
|
- test_file: jstests/replsets/rollback_index_build_start_abort.js
|
||||||
|
ticket: SERVER-103955
|
||||||
suites: null
|
suites: null
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,68 @@
|
||||||
|
/**
|
||||||
|
* Tests that even when enough secondaries have voted to commit an index build, the primary does
|
||||||
|
* not consider commit quorum satisfied if itself has not completed. The index build can still
|
||||||
|
* be aborted on the primary instead of hanging indefinitely.
|
||||||
|
*
|
||||||
|
* @tags: [
|
||||||
|
* requires_commit_quorum,
|
||||||
|
* requires_replication,
|
||||||
|
* ]
|
||||||
|
*/
|
||||||
|
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
|
||||||
|
import {ReplSetTest} from "jstests/libs/replsettest.js";
|
||||||
|
import {IndexBuildTest} from "jstests/noPassthrough/libs/index_builds/index_build.js";
|
||||||
|
|
||||||
|
const rst = new ReplSetTest({nodes: 2});
|
||||||
|
rst.startSet();
|
||||||
|
rst.initiate();
|
||||||
|
|
||||||
|
const primary = rst.getPrimary();
|
||||||
|
const testDB = primary.getDB("test");
|
||||||
|
const coll = testDB.getCollection("test");
|
||||||
|
|
||||||
|
assert.commandWorked(coll.insert({a: 1}));
|
||||||
|
|
||||||
|
const secondary = rst.getSecondary();
|
||||||
|
const secondaryDB = secondary.getDB(testDB.getName());
|
||||||
|
const secondaryColl = secondaryDB.getCollection(coll.getName());
|
||||||
|
|
||||||
|
// Pause primary index build after starting.
|
||||||
|
IndexBuildTest.pauseIndexBuilds(primary);
|
||||||
|
// Pause secondary index build after voting for commit.
|
||||||
|
const hangAfterVoteCommit = configureFailPoint(secondaryDB, "hangIndexBuildAfterSignalPrimaryForCommitReadiness");
|
||||||
|
|
||||||
|
jsTest.log.info("Waiting for index build to start");
|
||||||
|
const createIdx = IndexBuildTest.startIndexBuild(
|
||||||
|
primary,
|
||||||
|
coll.getFullName(),
|
||||||
|
{a: 1},
|
||||||
|
null,
|
||||||
|
/* expectedFailures */ [ErrorCodes.Interrupted],
|
||||||
|
/* commitQuorum */ 1,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Wait for the index build to start on both nodes.
|
||||||
|
const opId = IndexBuildTest.waitForIndexBuildToStart(testDB, coll.getName(), "a_1");
|
||||||
|
IndexBuildTest.assertIndexBuildCurrentOpContents(testDB, opId);
|
||||||
|
const secondaryOpId = IndexBuildTest.waitForIndexBuildToStart(secondaryDB, coll.getName(), "a_1");
|
||||||
|
IndexBuildTest.assertIndexBuildCurrentOpContents(secondaryDB, secondaryOpId);
|
||||||
|
|
||||||
|
jsTest.log.info("Waiting for secondary to vote to commit the index");
|
||||||
|
hangAfterVoteCommit.wait();
|
||||||
|
IndexBuildTest.assertIndexesSoon(secondaryColl, 2, ["_id_", "a_1"]);
|
||||||
|
|
||||||
|
// Primary should not consider commit quorum satisfied and still allow to abort.
|
||||||
|
IndexBuildTest.assertIndexesSoon(coll, 2, ["_id_", "a_1"]);
|
||||||
|
testDB.killOp(opId);
|
||||||
|
|
||||||
|
jsTest.log.info("Waiting for index build to stop");
|
||||||
|
IndexBuildTest.waitForIndexBuildToStop(testDB);
|
||||||
|
IndexBuildTest.assertIndexesSoon(coll, 1, ["_id_"]);
|
||||||
|
|
||||||
|
IndexBuildTest.waitForIndexBuildToStop(secondaryDB);
|
||||||
|
IndexBuildTest.assertIndexesSoon(secondaryColl, 1, ["_id_"]);
|
||||||
|
|
||||||
|
const exitCode = createIdx();
|
||||||
|
assert.eq(0, exitCode, "expected shell to exit successfully");
|
||||||
|
|
||||||
|
rst.stopSet();
|
||||||
|
|
@ -0,0 +1,91 @@
|
||||||
|
/**
|
||||||
|
* Tests that if secondaries have voted but the primary has not, and if a secondary steps up and sees
|
||||||
|
* that commit quorum is satisfied, it proceeds to commit the index build.
|
||||||
|
*
|
||||||
|
* @tags: [
|
||||||
|
* requires_commit_quorum,
|
||||||
|
* requires_replication,
|
||||||
|
* ]
|
||||||
|
*/
|
||||||
|
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
|
||||||
|
import {ReplSetTest} from "jstests/libs/replsettest.js";
|
||||||
|
import {IndexBuildTest} from "jstests/noPassthrough/libs/index_builds/index_build.js";
|
||||||
|
|
||||||
|
const rst = new ReplSetTest({nodes: 2});
|
||||||
|
rst.startSet();
|
||||||
|
rst.initiate();
|
||||||
|
|
||||||
|
const primary = rst.getPrimary();
|
||||||
|
const testDB = primary.getDB("test");
|
||||||
|
const coll = testDB.getCollection("test");
|
||||||
|
|
||||||
|
assert.commandWorked(coll.insert({a: 1}));
|
||||||
|
|
||||||
|
const secondary = rst.getSecondary();
|
||||||
|
const secondaryDB = secondary.getDB(testDB.getName());
|
||||||
|
const secondaryColl = secondaryDB.getCollection(coll.getName());
|
||||||
|
|
||||||
|
// Pause primary index build after starting.
|
||||||
|
IndexBuildTest.pauseIndexBuilds(primary);
|
||||||
|
|
||||||
|
jsTest.log.info("Waiting for index build to start");
|
||||||
|
const createIdx = IndexBuildTest.startIndexBuild(
|
||||||
|
primary,
|
||||||
|
coll.getFullName(),
|
||||||
|
{a: 1},
|
||||||
|
null,
|
||||||
|
/* expectedFailures */ [ErrorCodes.InterruptedDueToReplStateChange],
|
||||||
|
/* commitQuorum */ 1,
|
||||||
|
);
|
||||||
|
|
||||||
|
// Wait for the index build to start on both nodes.
|
||||||
|
const opId = IndexBuildTest.waitForIndexBuildToStart(testDB, coll.getName(), "a_1");
|
||||||
|
IndexBuildTest.assertIndexBuildCurrentOpContents(testDB, opId);
|
||||||
|
IndexBuildTest.assertIndexesSoon(coll, 2, ["_id_", "a_1"]);
|
||||||
|
|
||||||
|
const secondaryOpId = IndexBuildTest.waitForIndexBuildToStart(secondaryDB, coll.getName(), "a_1");
|
||||||
|
IndexBuildTest.assertIndexBuildCurrentOpContents(secondaryDB, secondaryOpId);
|
||||||
|
IndexBuildTest.assertIndexesSoon(secondaryColl, 2, ["_id_", "a_1"]);
|
||||||
|
|
||||||
|
// Before stepping down primary, make sure secondary pauses on step up.
|
||||||
|
const hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum = configureFailPoint(
|
||||||
|
secondaryDB,
|
||||||
|
"hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum",
|
||||||
|
);
|
||||||
|
|
||||||
|
jsTest.log.info("Waiting for primary to step down");
|
||||||
|
rst.awaitReplication();
|
||||||
|
const stepDown = startParallelShell(() => {
|
||||||
|
assert.commandWorked(db.adminCommand({"replSetStepDown": 60, "force": false}));
|
||||||
|
}, primary.port);
|
||||||
|
// Wait for stepdown to complete.
|
||||||
|
stepDown();
|
||||||
|
|
||||||
|
// The index build on old primary will continue in the background.
|
||||||
|
const exitCode = createIdx();
|
||||||
|
assert.eq(0, exitCode, "expected shell to exit successfully");
|
||||||
|
|
||||||
|
jsTest.log.info("Waiting for secondary to step up and satisfy commit quorum as new primary");
|
||||||
|
hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum.wait();
|
||||||
|
|
||||||
|
// Resume index builds on both nodes.
|
||||||
|
IndexBuildTest.resumeIndexBuilds(primary);
|
||||||
|
hangOnStepUpAsyncTaskBeforeCheckingCommitQuorum.off();
|
||||||
|
|
||||||
|
jsTest.log.info("Waiting for index build to stop");
|
||||||
|
IndexBuildTest.waitForIndexBuildToStop(testDB);
|
||||||
|
IndexBuildTest.waitForIndexBuildToStop(secondaryDB);
|
||||||
|
|
||||||
|
// Expect "Index build: completed successfully" in the log.
|
||||||
|
checkLog.containsJson(primary, 20663, {
|
||||||
|
namespace: coll.getFullName(),
|
||||||
|
indexesBuilt: ["a_1"],
|
||||||
|
numIndexesAfter: 2,
|
||||||
|
});
|
||||||
|
checkLog.containsJson(secondary, 20663, {
|
||||||
|
namespace: coll.getFullName(),
|
||||||
|
indexesBuilt: ["a_1"],
|
||||||
|
numIndexesAfter: 2,
|
||||||
|
});
|
||||||
|
|
||||||
|
rst.stopSet();
|
||||||
|
|
@ -111,7 +111,7 @@ export class RollbackIndexBuildsTest {
|
||||||
|
|
||||||
var errcodes = self.expectedErrors ? self.expectedErrors : [];
|
var errcodes = self.expectedErrors ? self.expectedErrors : [];
|
||||||
// This test creates indexes with majority of nodes not available for
|
// This test creates indexes with majority of nodes not available for
|
||||||
// replication. So, disabling index build commit quorum.
|
// replication, so set index build commit quorum to 1.
|
||||||
indexBuilds.push(
|
indexBuilds.push(
|
||||||
IndexBuildTest.startIndexBuild(
|
IndexBuildTest.startIndexBuild(
|
||||||
primary,
|
primary,
|
||||||
|
|
@ -119,7 +119,7 @@ export class RollbackIndexBuildsTest {
|
||||||
indexSpec,
|
indexSpec,
|
||||||
{},
|
{},
|
||||||
errcodes,
|
errcodes,
|
||||||
0,
|
1,
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1593,7 +1593,7 @@ bool IndexBuildsCoordinator::abortIndexBuildByBuildUUID(OperationContext* opCtx,
|
||||||
gFeatureFlagIntentRegistration.isEnabled());
|
gFeatureFlagIntentRegistration.isEnabled());
|
||||||
|
|
||||||
// Override the 'signalAction' as this is an initial syncing node.
|
// Override the 'signalAction' as this is an initial syncing node.
|
||||||
// Don't override it if it's a rollback abort which would be explictly requested
|
// Don't override it if it's a rollback abort which would be explicitly requested
|
||||||
// by the initial sync code.
|
// by the initial sync code.
|
||||||
auto replCoord = repl::ReplicationCoordinator::get(opCtx);
|
auto replCoord = repl::ReplicationCoordinator::get(opCtx);
|
||||||
if (replCoord->getMemberState().startup2() &&
|
if (replCoord->getMemberState().startup2() &&
|
||||||
|
|
|
||||||
|
|
@ -680,7 +680,7 @@ protected:
|
||||||
/**
|
/**
|
||||||
* Runs the index build on the caller thread. Handles unregistering the index build and setting
|
* Runs the index build on the caller thread. Handles unregistering the index build and setting
|
||||||
* the index build's Promise with the outcome of the index build.
|
* the index build's Promise with the outcome of the index build.
|
||||||
* 'IndexBuildOptios::replSetAndNotPrimary' is determined at the start of the index build.
|
* 'IndexBuildOptions::replSetAndNotPrimary' is determined at the start of the index build.
|
||||||
*/
|
*/
|
||||||
void _runIndexBuild(OperationContext* opCtx,
|
void _runIndexBuild(OperationContext* opCtx,
|
||||||
const UUID& buildUUID,
|
const UUID& buildUUID,
|
||||||
|
|
@ -809,7 +809,7 @@ protected:
|
||||||
/**
|
/**
|
||||||
* Attempt to signal the index build to commit and advance the index build to the
|
* Attempt to signal the index build to commit and advance the index build to the
|
||||||
* kApplyCommitOplogEntry state. Returns true if successful and false if the attempt was
|
* kApplyCommitOplogEntry state. Returns true if successful and false if the attempt was
|
||||||
* unnecessful and the caller should retry.
|
* unsuccessful and the caller should retry.
|
||||||
*/
|
*/
|
||||||
bool _tryCommit(OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState);
|
bool _tryCommit(OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState);
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -678,7 +678,14 @@ bool IndexBuildsCoordinatorMongod::_signalIfCommitQuorumIsSatisfied(
|
||||||
if (!voteMemberList)
|
if (!voteMemberList)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
bool commitQuorumSatisfied = repl::ReplicationCoordinator::get(opCtx)->isCommitQuorumSatisfied(
|
const auto replCoord = repl::ReplicationCoordinator::get(opCtx);
|
||||||
|
if (std::find(voteMemberList->begin(), voteMemberList->end(), replCoord->getMyHostAndPort()) ==
|
||||||
|
voteMemberList->end()) {
|
||||||
|
// Only after primary has committed can we proceed to check for commit quorum satisfied.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool commitQuorumSatisfied = replCoord->isCommitQuorumSatisfied(
|
||||||
indexBuildEntry.getCommitQuorum(), voteMemberList.value());
|
indexBuildEntry.getCommitQuorum(), voteMemberList.value());
|
||||||
|
|
||||||
if (!commitQuorumSatisfied)
|
if (!commitQuorumSatisfied)
|
||||||
|
|
|
||||||
|
|
@ -394,7 +394,9 @@ TEST_F(IndexBuildsCoordinatorMongodTest, SetCommitQuorumFailsToTurnCommitQuorumF
|
||||||
ASSERT_EQUALS(ErrorCodes::BadValue, status);
|
ASSERT_EQUALS(ErrorCodes::BadValue, status);
|
||||||
|
|
||||||
ASSERT_OK(_indexBuildsCoord->voteCommitIndexBuild(
|
ASSERT_OK(_indexBuildsCoord->voteCommitIndexBuild(
|
||||||
operationContext(), buildUUID, HostAndPort("test1", 1234)));
|
operationContext(),
|
||||||
|
buildUUID,
|
||||||
|
repl::ReplicationCoordinator::get(operationContext())->getMyHostAndPort()));
|
||||||
|
|
||||||
assertGet(testFoo1Future.getNoThrow());
|
assertGet(testFoo1Future.getNoThrow());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -418,7 +418,8 @@ int ReplicationCoordinatorMock::getMyId() const {
|
||||||
}
|
}
|
||||||
|
|
||||||
HostAndPort ReplicationCoordinatorMock::getMyHostAndPort() const {
|
HostAndPort ReplicationCoordinatorMock::getMyHostAndPort() const {
|
||||||
return HostAndPort();
|
// Set to a non-empty value to satisfy the deserializer parser.
|
||||||
|
return HostAndPort("test1", 1234);
|
||||||
}
|
}
|
||||||
|
|
||||||
boost::optional<int> ReplicationCoordinatorMock::getMyMaintenancePort() const {
|
boost::optional<int> ReplicationCoordinatorMock::getMyMaintenancePort() const {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue