mirror of https://github.com/mongodb/mongo
SERVER-115398 Differentiate between steady-state replication and startup recovery for lastTimeBetweenCommitOplogAndCommitMillis (#45204)
GitOrigin-RevId: 66e4c66764bc742b359cda41bbe4c125016456ad
This commit is contained in:
parent
1afd1b3547
commit
7296788d8f
|
|
@ -18,6 +18,9 @@ const dbName = jsTestName();
|
|||
const collName = "t";
|
||||
const lastTimeBetweenVoteAndCommitMillis = "lastTimeBetweenVoteAndCommitMillis";
|
||||
const lastTimeBetweenCommitOplogAndCommitMillis = "lastTimeBetweenCommitOplogAndCommitMillis";
|
||||
const lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery =
|
||||
"lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery";
|
||||
const lastTimeBetweenCommitOplogAndCommitMillisRestore = "lastTimeBetweenCommitOplogAndCommitMillisRestore";
|
||||
|
||||
function getIndexBuildServerStatusMetric(conn, metricName) {
|
||||
const serverStatus = conn.serverStatus();
|
||||
|
|
@ -40,6 +43,39 @@ function getIndexBuildServerStatusMetric(conn, metricName) {
|
|||
return phases[metricName];
|
||||
}
|
||||
|
||||
function checkMetrics(conn, metricNames, expectedValues, comparators) {
|
||||
assert.eq(metricNames.length, expectedValues.length);
|
||||
assert.eq(metricNames.length, comparators.length);
|
||||
|
||||
for (let i = 0; i < metricNames.length; i++) {
|
||||
const metricValue = getIndexBuildServerStatusMetric(conn, metricNames[i]);
|
||||
const expectedValue = expectedValues[i];
|
||||
const comparator = comparators[i];
|
||||
|
||||
if (comparator === "eq") {
|
||||
assert.eq(
|
||||
metricValue,
|
||||
expectedValue,
|
||||
`Expected value of ${metricNames[i]} to be ${expectedValue}, found ${metricValue}`,
|
||||
);
|
||||
} else if (comparator === "gte") {
|
||||
assert.gte(
|
||||
metricValue,
|
||||
expectedValue,
|
||||
`Expected value of ${metricNames[i]} to be greater than or equal to ${expectedValue}, found ${metricValue}`,
|
||||
);
|
||||
} else if (comparator === "gt") {
|
||||
assert.gt(
|
||||
metricValue,
|
||||
expectedValue,
|
||||
`Expected value of ${metricNames[i]} to be greater than ${expectedValue}, found ${metricValue}`,
|
||||
);
|
||||
} else {
|
||||
throw new Error(`Unexpected comparator: ${comparator}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
describe("indexBuilds serverStatus metrics", function () {
|
||||
before(() => {
|
||||
this.rst = new ReplSetTest({
|
||||
|
|
@ -78,16 +114,8 @@ describe("indexBuilds serverStatus metrics", function () {
|
|||
});
|
||||
|
||||
it("duration between voting to commit and committing", () => {
|
||||
let durationBetweenVotingToCommitAndCommitting = getIndexBuildServerStatusMetric(
|
||||
this.primaryDB,
|
||||
lastTimeBetweenVoteAndCommitMillis,
|
||||
);
|
||||
checkMetrics(this.primaryDB, [lastTimeBetweenVoteAndCommitMillis], [0], ["eq"]);
|
||||
|
||||
assert.eq(
|
||||
durationBetweenVotingToCommitAndCommitting,
|
||||
0,
|
||||
`Expected initial value of ${lastTimeBetweenVoteAndCommitMillis} to be 0, found ${durationBetweenVotingToCommitAndCommitting}`,
|
||||
);
|
||||
const fp = configureFailPoint(this.primaryDB, "hangIndexBuildAfterSignalPrimaryForCommitReadiness");
|
||||
const awaitCreateIndex = IndexBuildTest.startIndexBuild(this.primary, this.coll.getFullName(), {x: 1});
|
||||
|
||||
|
|
@ -100,25 +128,19 @@ describe("indexBuilds serverStatus metrics", function () {
|
|||
IndexBuildTest.waitForIndexBuildToStop(this.secondaryDB);
|
||||
awaitCreateIndex();
|
||||
|
||||
durationBetweenVotingToCommitAndCommitting = getIndexBuildServerStatusMetric(
|
||||
this.primaryDB,
|
||||
lastTimeBetweenVoteAndCommitMillis,
|
||||
);
|
||||
|
||||
assert.gte(
|
||||
durationBetweenVotingToCommitAndCommitting,
|
||||
1000,
|
||||
`Expected value of ${lastTimeBetweenVoteAndCommitMillis} to be greater than 1000, found ${durationBetweenVotingToCommitAndCommitting}`,
|
||||
);
|
||||
checkMetrics(this.primaryDB, [lastTimeBetweenVoteAndCommitMillis], [1000], ["gte"]);
|
||||
});
|
||||
|
||||
it("receive commitIndexBuild during steady state", () => {
|
||||
let duration = getIndexBuildServerStatusMetric(this.secondaryDB, lastTimeBetweenCommitOplogAndCommitMillis);
|
||||
|
||||
assert.eq(
|
||||
duration,
|
||||
0,
|
||||
`Expected initial value of ${lastTimeBetweenCommitOplogAndCommitMillis} to be 0, found ${duration}`,
|
||||
checkMetrics(
|
||||
this.secondaryDB,
|
||||
[
|
||||
lastTimeBetweenCommitOplogAndCommitMillis,
|
||||
lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery,
|
||||
lastTimeBetweenCommitOplogAndCommitMillisRestore,
|
||||
],
|
||||
[0, 0, 0],
|
||||
["eq", "eq", "eq"],
|
||||
);
|
||||
|
||||
const fp = configureFailPoint(this.secondary, "hangIndexBuildAfterReceivingCommitIndexBuildOplogEntry");
|
||||
|
|
@ -137,25 +159,28 @@ describe("indexBuilds serverStatus metrics", function () {
|
|||
|
||||
awaitCreateIndex();
|
||||
|
||||
duration = getIndexBuildServerStatusMetric(this.secondaryDB, lastTimeBetweenCommitOplogAndCommitMillis);
|
||||
|
||||
assert.gt(
|
||||
duration,
|
||||
1000,
|
||||
`Expected value of ${lastTimeBetweenCommitOplogAndCommitMillis} to be greater than 1000, found ${duration}`,
|
||||
checkMetrics(
|
||||
this.secondaryDB,
|
||||
[
|
||||
lastTimeBetweenCommitOplogAndCommitMillis,
|
||||
lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery,
|
||||
lastTimeBetweenCommitOplogAndCommitMillisRestore,
|
||||
],
|
||||
[1000, 0, 0],
|
||||
["gte", "eq", "eq"],
|
||||
);
|
||||
});
|
||||
|
||||
it("receive commitIndexBuild during startupRecovery", () => {
|
||||
let durationBetweenReceivingCommitIndexBuildEntryAndCommitting = getIndexBuildServerStatusMetric(
|
||||
checkMetrics(
|
||||
this.secondaryDB,
|
||||
[
|
||||
lastTimeBetweenCommitOplogAndCommitMillis,
|
||||
);
|
||||
|
||||
assert.eq(
|
||||
durationBetweenReceivingCommitIndexBuildEntryAndCommitting,
|
||||
0,
|
||||
`Expected initial value of ${lastTimeBetweenCommitOplogAndCommitMillis} to be 0, found ${durationBetweenReceivingCommitIndexBuildEntryAndCommitting}`,
|
||||
lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery,
|
||||
lastTimeBetweenCommitOplogAndCommitMillisRestore,
|
||||
],
|
||||
[0, 0, 0],
|
||||
["eq", "eq", "eq"],
|
||||
);
|
||||
|
||||
const hangAfterVotingFP = configureFailPoint(
|
||||
|
|
@ -176,7 +201,7 @@ describe("indexBuilds serverStatus metrics", function () {
|
|||
this.secondary,
|
||||
{
|
||||
setParameter: {
|
||||
"failpoint.hangIndexBuildBeforeCommit": tojson({
|
||||
"failpoint.hangIndexBuildAfterReceivingCommitIndexBuildOplogEntry": tojson({
|
||||
mode: {times: 1},
|
||||
}),
|
||||
},
|
||||
|
|
@ -189,15 +214,15 @@ describe("indexBuilds serverStatus metrics", function () {
|
|||
|
||||
awaitCreateIndex();
|
||||
|
||||
durationBetweenReceivingCommitIndexBuildEntryAndCommitting = getIndexBuildServerStatusMetric(
|
||||
checkMetrics(
|
||||
secondaryDBAfterRestart,
|
||||
[
|
||||
lastTimeBetweenCommitOplogAndCommitMillis,
|
||||
);
|
||||
|
||||
assert.gt(
|
||||
durationBetweenReceivingCommitIndexBuildEntryAndCommitting,
|
||||
0,
|
||||
`Expected value of ${lastTimeBetweenCommitOplogAndCommitMillis} to be greater than 0, found ${durationBetweenReceivingCommitIndexBuildEntryAndCommitting}`,
|
||||
lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery,
|
||||
lastTimeBetweenCommitOplogAndCommitMillisRestore,
|
||||
],
|
||||
[0, 0, 0],
|
||||
["eq", "gt", "eq"],
|
||||
);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -162,6 +162,10 @@ public:
|
|||
phases.append("lastCommittedMillis", lastCommittedMillis.loadRelaxed());
|
||||
phases.append("lastTimeBetweenCommitOplogAndCommitMillis",
|
||||
lastTimeBetweenCommitOplogAndCommitMillis.loadRelaxed());
|
||||
phases.append("lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery",
|
||||
lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery.loadRelaxed());
|
||||
phases.append("lastTimeBetweenCommitOplogAndCommitMillisRestore",
|
||||
lastTimeBetweenCommitOplogAndCommitMillisRestore.loadRelaxed());
|
||||
phases.append("lastTimeBetweenVoteAndCommitMillis",
|
||||
lastTimeBetweenVoteAndCommitMillis.loadRelaxed());
|
||||
phases.done();
|
||||
|
|
@ -195,8 +199,15 @@ public:
|
|||
// The duration of the last committed index build.
|
||||
AtomicWord<int64_t> lastCommittedMillis{0};
|
||||
// The duration between receiving the commitIndexBuild oplog entry and committing the index
|
||||
// build.
|
||||
// build for steady state replication for the last committed index build.
|
||||
AtomicWord<int64_t> lastTimeBetweenCommitOplogAndCommitMillis;
|
||||
// The duration between receiving the commitIndexBuild oplog entry and committing the index
|
||||
// build during startup recovery for the last committed index build.
|
||||
AtomicWord<int64_t> lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery;
|
||||
// The duration between receiving the commitIndexBuild oplog entry and committing the index
|
||||
// build for the last index build, when restoring a node using magic restore or when restoring
|
||||
// it from the oplog as a standalone node.
|
||||
AtomicWord<int64_t> lastTimeBetweenCommitOplogAndCommitMillisRestore;
|
||||
// The duration between voting to commit and committing the index build.
|
||||
AtomicWord<int64_t> lastTimeBetweenVoteAndCommitMillis;
|
||||
};
|
||||
|
|
@ -211,6 +222,12 @@ constexpr StringData kIndexesFieldName = "indexes"_sd;
|
|||
constexpr StringData kKeyFieldName = "key"_sd;
|
||||
constexpr StringData kUniqueFieldName = "unique"_sd;
|
||||
constexpr StringData kPrepareUniqueFieldName = "prepareUnique"_sd;
|
||||
constexpr StringData kLastTimeBetweenCommitOplogAndCommitMillis =
|
||||
"lastTimeBetweenCommitOplogAndCommitMillis"_sd;
|
||||
constexpr StringData kLastTimeBetweenCommitOplogAndCommitMillisStartupRecovery =
|
||||
"lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery"_sd;
|
||||
constexpr StringData kLastTimeBetweenCommitOplogAndCommitMillisRestore =
|
||||
"lastTimeBetweenCommitOplogAndCommitMillisRestore"_sd;
|
||||
|
||||
/**
|
||||
* Returns true if we should build the indexes an empty collection using the IndexCatalog and
|
||||
|
|
@ -597,17 +614,19 @@ void storeLastTimeBetweenVoteAndCommitMillis(const ReplIndexBuildState& replStat
|
|||
}
|
||||
|
||||
/**
|
||||
* Stores the duration between receiving the `commitIndexBuild` oplog entry and committing the
|
||||
* Stores the duration between receiving the `commitIndexBuild` oplog entry and committing the index
|
||||
* build, parameterized for the different scenarios that we could be applying the entry from.
|
||||
*/
|
||||
void storeLastTimeBetweenCommitOplogAndCommit(const ReplIndexBuildState& replState) {
|
||||
void storeLastTimeBetweenCommitOplogAndCommit(const ReplIndexBuildState& replState,
|
||||
StringData metricName,
|
||||
AtomicWord<int64_t>& metric) {
|
||||
const auto metrics = replState.getIndexBuildMetrics();
|
||||
const auto now = Date_t::now();
|
||||
tassert(11436300,
|
||||
"commitIndexOplogEntryTime was not set before setting "
|
||||
"lastTimeBetweenCommitOplogAndCommitMillis",
|
||||
str::stream() << "commitIndexOplogEntryTime was not set before setting " << metricName,
|
||||
metrics.commitIndexOplogEntryTime != Date_t::min());
|
||||
const auto elapsedTime = (now - metrics.commitIndexOplogEntryTime).count();
|
||||
indexBuildsSSS.lastTimeBetweenCommitOplogAndCommitMillis.store(elapsedTime);
|
||||
metric.store(elapsedTime);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
|
@ -1294,7 +1313,7 @@ void IndexBuildsCoordinator::applyCommitIndexBuild(OperationContext* opCtx,
|
|||
|
||||
auto swReplState = _getIndexBuild(buildUUID);
|
||||
auto replCoord = repl::ReplicationCoordinator::get(opCtx);
|
||||
|
||||
bool restartingPausedIndexInStandaloneOrRestore = false;
|
||||
// Index builds are not restarted in standalone mode. If the node is started with
|
||||
// recoverFromOplogAsStandalone or is in magic restore and when replaying the commitIndexBuild
|
||||
// oplog entry for a paused index, there is no active index build thread to commit.
|
||||
|
|
@ -1313,6 +1332,7 @@ void IndexBuildsCoordinator::applyCommitIndexBuild(OperationContext* opCtx,
|
|||
|
||||
// Get the builder.
|
||||
swReplState = _getIndexBuild(buildUUID);
|
||||
restartingPausedIndexInStandaloneOrRestore = true;
|
||||
}
|
||||
auto replState = uassertStatusOK(swReplState);
|
||||
replState->setMultikey(std::move(oplogEntry.multikey));
|
||||
|
|
@ -1333,7 +1353,22 @@ void IndexBuildsCoordinator::applyCommitIndexBuild(OperationContext* opCtx,
|
|||
"buildUUID"_attr = buildUUID,
|
||||
"waitResult"_attr = waitStatus,
|
||||
"status"_attr = buildStatus);
|
||||
storeLastTimeBetweenCommitOplogAndCommit(*replState);
|
||||
if (restartingPausedIndexInStandaloneOrRestore) {
|
||||
storeLastTimeBetweenCommitOplogAndCommit(
|
||||
*replState,
|
||||
kLastTimeBetweenCommitOplogAndCommitMillisRestore,
|
||||
indexBuildsSSS.lastTimeBetweenCommitOplogAndCommitMillisRestore);
|
||||
} else if (InReplicationRecovery::isSet(opCtx->getServiceContext())) {
|
||||
storeLastTimeBetweenCommitOplogAndCommit(
|
||||
*replState,
|
||||
kLastTimeBetweenCommitOplogAndCommitMillisStartupRecovery,
|
||||
indexBuildsSSS.lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery);
|
||||
} else {
|
||||
storeLastTimeBetweenCommitOplogAndCommit(
|
||||
*replState,
|
||||
kLastTimeBetweenCommitOplogAndCommitMillis,
|
||||
indexBuildsSSS.lastTimeBetweenCommitOplogAndCommitMillis);
|
||||
}
|
||||
|
||||
// Throws if there was an error building the index.
|
||||
fut.get();
|
||||
|
|
|
|||
Loading…
Reference in New Issue