mirror of https://github.com/mongodb/mongo
SERVER-115398 Differentiate between steady-state replication and startup recovery for lastTimeBetweenCommitOplogAndCommitMillis (#45204)
GitOrigin-RevId: 66e4c66764bc742b359cda41bbe4c125016456ad
This commit is contained in:
parent
1afd1b3547
commit
7296788d8f
|
|
@ -18,6 +18,9 @@ const dbName = jsTestName();
|
||||||
const collName = "t";
|
const collName = "t";
|
||||||
const lastTimeBetweenVoteAndCommitMillis = "lastTimeBetweenVoteAndCommitMillis";
|
const lastTimeBetweenVoteAndCommitMillis = "lastTimeBetweenVoteAndCommitMillis";
|
||||||
const lastTimeBetweenCommitOplogAndCommitMillis = "lastTimeBetweenCommitOplogAndCommitMillis";
|
const lastTimeBetweenCommitOplogAndCommitMillis = "lastTimeBetweenCommitOplogAndCommitMillis";
|
||||||
|
const lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery =
|
||||||
|
"lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery";
|
||||||
|
const lastTimeBetweenCommitOplogAndCommitMillisRestore = "lastTimeBetweenCommitOplogAndCommitMillisRestore";
|
||||||
|
|
||||||
function getIndexBuildServerStatusMetric(conn, metricName) {
|
function getIndexBuildServerStatusMetric(conn, metricName) {
|
||||||
const serverStatus = conn.serverStatus();
|
const serverStatus = conn.serverStatus();
|
||||||
|
|
@ -40,6 +43,39 @@ function getIndexBuildServerStatusMetric(conn, metricName) {
|
||||||
return phases[metricName];
|
return phases[metricName];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function checkMetrics(conn, metricNames, expectedValues, comparators) {
|
||||||
|
assert.eq(metricNames.length, expectedValues.length);
|
||||||
|
assert.eq(metricNames.length, comparators.length);
|
||||||
|
|
||||||
|
for (let i = 0; i < metricNames.length; i++) {
|
||||||
|
const metricValue = getIndexBuildServerStatusMetric(conn, metricNames[i]);
|
||||||
|
const expectedValue = expectedValues[i];
|
||||||
|
const comparator = comparators[i];
|
||||||
|
|
||||||
|
if (comparator === "eq") {
|
||||||
|
assert.eq(
|
||||||
|
metricValue,
|
||||||
|
expectedValue,
|
||||||
|
`Expected value of ${metricNames[i]} to be ${expectedValue}, found ${metricValue}`,
|
||||||
|
);
|
||||||
|
} else if (comparator === "gte") {
|
||||||
|
assert.gte(
|
||||||
|
metricValue,
|
||||||
|
expectedValue,
|
||||||
|
`Expected value of ${metricNames[i]} to be greater than or equal to ${expectedValue}, found ${metricValue}`,
|
||||||
|
);
|
||||||
|
} else if (comparator === "gt") {
|
||||||
|
assert.gt(
|
||||||
|
metricValue,
|
||||||
|
expectedValue,
|
||||||
|
`Expected value of ${metricNames[i]} to be greater than ${expectedValue}, found ${metricValue}`,
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
throw new Error(`Unexpected comparator: ${comparator}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
describe("indexBuilds serverStatus metrics", function () {
|
describe("indexBuilds serverStatus metrics", function () {
|
||||||
before(() => {
|
before(() => {
|
||||||
this.rst = new ReplSetTest({
|
this.rst = new ReplSetTest({
|
||||||
|
|
@ -78,16 +114,8 @@ describe("indexBuilds serverStatus metrics", function () {
|
||||||
});
|
});
|
||||||
|
|
||||||
it("duration between voting to commit and committing", () => {
|
it("duration between voting to commit and committing", () => {
|
||||||
let durationBetweenVotingToCommitAndCommitting = getIndexBuildServerStatusMetric(
|
checkMetrics(this.primaryDB, [lastTimeBetweenVoteAndCommitMillis], [0], ["eq"]);
|
||||||
this.primaryDB,
|
|
||||||
lastTimeBetweenVoteAndCommitMillis,
|
|
||||||
);
|
|
||||||
|
|
||||||
assert.eq(
|
|
||||||
durationBetweenVotingToCommitAndCommitting,
|
|
||||||
0,
|
|
||||||
`Expected initial value of ${lastTimeBetweenVoteAndCommitMillis} to be 0, found ${durationBetweenVotingToCommitAndCommitting}`,
|
|
||||||
);
|
|
||||||
const fp = configureFailPoint(this.primaryDB, "hangIndexBuildAfterSignalPrimaryForCommitReadiness");
|
const fp = configureFailPoint(this.primaryDB, "hangIndexBuildAfterSignalPrimaryForCommitReadiness");
|
||||||
const awaitCreateIndex = IndexBuildTest.startIndexBuild(this.primary, this.coll.getFullName(), {x: 1});
|
const awaitCreateIndex = IndexBuildTest.startIndexBuild(this.primary, this.coll.getFullName(), {x: 1});
|
||||||
|
|
||||||
|
|
@ -100,25 +128,19 @@ describe("indexBuilds serverStatus metrics", function () {
|
||||||
IndexBuildTest.waitForIndexBuildToStop(this.secondaryDB);
|
IndexBuildTest.waitForIndexBuildToStop(this.secondaryDB);
|
||||||
awaitCreateIndex();
|
awaitCreateIndex();
|
||||||
|
|
||||||
durationBetweenVotingToCommitAndCommitting = getIndexBuildServerStatusMetric(
|
checkMetrics(this.primaryDB, [lastTimeBetweenVoteAndCommitMillis], [1000], ["gte"]);
|
||||||
this.primaryDB,
|
|
||||||
lastTimeBetweenVoteAndCommitMillis,
|
|
||||||
);
|
|
||||||
|
|
||||||
assert.gte(
|
|
||||||
durationBetweenVotingToCommitAndCommitting,
|
|
||||||
1000,
|
|
||||||
`Expected value of ${lastTimeBetweenVoteAndCommitMillis} to be greater than 1000, found ${durationBetweenVotingToCommitAndCommitting}`,
|
|
||||||
);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
it("receive commitIndexBuild during steady state", () => {
|
it("receive commitIndexBuild during steady state", () => {
|
||||||
let duration = getIndexBuildServerStatusMetric(this.secondaryDB, lastTimeBetweenCommitOplogAndCommitMillis);
|
checkMetrics(
|
||||||
|
this.secondaryDB,
|
||||||
assert.eq(
|
[
|
||||||
duration,
|
lastTimeBetweenCommitOplogAndCommitMillis,
|
||||||
0,
|
lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery,
|
||||||
`Expected initial value of ${lastTimeBetweenCommitOplogAndCommitMillis} to be 0, found ${duration}`,
|
lastTimeBetweenCommitOplogAndCommitMillisRestore,
|
||||||
|
],
|
||||||
|
[0, 0, 0],
|
||||||
|
["eq", "eq", "eq"],
|
||||||
);
|
);
|
||||||
|
|
||||||
const fp = configureFailPoint(this.secondary, "hangIndexBuildAfterReceivingCommitIndexBuildOplogEntry");
|
const fp = configureFailPoint(this.secondary, "hangIndexBuildAfterReceivingCommitIndexBuildOplogEntry");
|
||||||
|
|
@ -137,25 +159,28 @@ describe("indexBuilds serverStatus metrics", function () {
|
||||||
|
|
||||||
awaitCreateIndex();
|
awaitCreateIndex();
|
||||||
|
|
||||||
duration = getIndexBuildServerStatusMetric(this.secondaryDB, lastTimeBetweenCommitOplogAndCommitMillis);
|
checkMetrics(
|
||||||
|
this.secondaryDB,
|
||||||
assert.gt(
|
[
|
||||||
duration,
|
lastTimeBetweenCommitOplogAndCommitMillis,
|
||||||
1000,
|
lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery,
|
||||||
`Expected value of ${lastTimeBetweenCommitOplogAndCommitMillis} to be greater than 1000, found ${duration}`,
|
lastTimeBetweenCommitOplogAndCommitMillisRestore,
|
||||||
|
],
|
||||||
|
[1000, 0, 0],
|
||||||
|
["gte", "eq", "eq"],
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
it("receive commitIndexBuild during startupRecovery", () => {
|
it("receive commitIndexBuild during startupRecovery", () => {
|
||||||
let durationBetweenReceivingCommitIndexBuildEntryAndCommitting = getIndexBuildServerStatusMetric(
|
checkMetrics(
|
||||||
this.secondaryDB,
|
this.secondaryDB,
|
||||||
lastTimeBetweenCommitOplogAndCommitMillis,
|
[
|
||||||
);
|
lastTimeBetweenCommitOplogAndCommitMillis,
|
||||||
|
lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery,
|
||||||
assert.eq(
|
lastTimeBetweenCommitOplogAndCommitMillisRestore,
|
||||||
durationBetweenReceivingCommitIndexBuildEntryAndCommitting,
|
],
|
||||||
0,
|
[0, 0, 0],
|
||||||
`Expected initial value of ${lastTimeBetweenCommitOplogAndCommitMillis} to be 0, found ${durationBetweenReceivingCommitIndexBuildEntryAndCommitting}`,
|
["eq", "eq", "eq"],
|
||||||
);
|
);
|
||||||
|
|
||||||
const hangAfterVotingFP = configureFailPoint(
|
const hangAfterVotingFP = configureFailPoint(
|
||||||
|
|
@ -176,7 +201,7 @@ describe("indexBuilds serverStatus metrics", function () {
|
||||||
this.secondary,
|
this.secondary,
|
||||||
{
|
{
|
||||||
setParameter: {
|
setParameter: {
|
||||||
"failpoint.hangIndexBuildBeforeCommit": tojson({
|
"failpoint.hangIndexBuildAfterReceivingCommitIndexBuildOplogEntry": tojson({
|
||||||
mode: {times: 1},
|
mode: {times: 1},
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
|
|
@ -189,15 +214,15 @@ describe("indexBuilds serverStatus metrics", function () {
|
||||||
|
|
||||||
awaitCreateIndex();
|
awaitCreateIndex();
|
||||||
|
|
||||||
durationBetweenReceivingCommitIndexBuildEntryAndCommitting = getIndexBuildServerStatusMetric(
|
checkMetrics(
|
||||||
secondaryDBAfterRestart,
|
secondaryDBAfterRestart,
|
||||||
lastTimeBetweenCommitOplogAndCommitMillis,
|
[
|
||||||
);
|
lastTimeBetweenCommitOplogAndCommitMillis,
|
||||||
|
lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery,
|
||||||
assert.gt(
|
lastTimeBetweenCommitOplogAndCommitMillisRestore,
|
||||||
durationBetweenReceivingCommitIndexBuildEntryAndCommitting,
|
],
|
||||||
0,
|
[0, 0, 0],
|
||||||
`Expected value of ${lastTimeBetweenCommitOplogAndCommitMillis} to be greater than 0, found ${durationBetweenReceivingCommitIndexBuildEntryAndCommitting}`,
|
["eq", "gt", "eq"],
|
||||||
);
|
);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -162,6 +162,10 @@ public:
|
||||||
phases.append("lastCommittedMillis", lastCommittedMillis.loadRelaxed());
|
phases.append("lastCommittedMillis", lastCommittedMillis.loadRelaxed());
|
||||||
phases.append("lastTimeBetweenCommitOplogAndCommitMillis",
|
phases.append("lastTimeBetweenCommitOplogAndCommitMillis",
|
||||||
lastTimeBetweenCommitOplogAndCommitMillis.loadRelaxed());
|
lastTimeBetweenCommitOplogAndCommitMillis.loadRelaxed());
|
||||||
|
phases.append("lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery",
|
||||||
|
lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery.loadRelaxed());
|
||||||
|
phases.append("lastTimeBetweenCommitOplogAndCommitMillisRestore",
|
||||||
|
lastTimeBetweenCommitOplogAndCommitMillisRestore.loadRelaxed());
|
||||||
phases.append("lastTimeBetweenVoteAndCommitMillis",
|
phases.append("lastTimeBetweenVoteAndCommitMillis",
|
||||||
lastTimeBetweenVoteAndCommitMillis.loadRelaxed());
|
lastTimeBetweenVoteAndCommitMillis.loadRelaxed());
|
||||||
phases.done();
|
phases.done();
|
||||||
|
|
@ -195,8 +199,15 @@ public:
|
||||||
// The duration of the last committed index build.
|
// The duration of the last committed index build.
|
||||||
AtomicWord<int64_t> lastCommittedMillis{0};
|
AtomicWord<int64_t> lastCommittedMillis{0};
|
||||||
// The duration between receiving the commitIndexBuild oplog entry and committing the index
|
// The duration between receiving the commitIndexBuild oplog entry and committing the index
|
||||||
// build.
|
// build for steady state replication for the last committed index build.
|
||||||
AtomicWord<int64_t> lastTimeBetweenCommitOplogAndCommitMillis;
|
AtomicWord<int64_t> lastTimeBetweenCommitOplogAndCommitMillis;
|
||||||
|
// The duration between receiving the commitIndexBuild oplog entry and committing the index
|
||||||
|
// build during startup recovery for the last committed index build.
|
||||||
|
AtomicWord<int64_t> lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery;
|
||||||
|
// The duration between receiving the commitIndexBuild oplog entry and committing the index
|
||||||
|
// build for the last index build, when restoring a node using magic restore or when restoring
|
||||||
|
// it from the oplog as a standalone node.
|
||||||
|
AtomicWord<int64_t> lastTimeBetweenCommitOplogAndCommitMillisRestore;
|
||||||
// The duration between voting to commit and committing the index build.
|
// The duration between voting to commit and committing the index build.
|
||||||
AtomicWord<int64_t> lastTimeBetweenVoteAndCommitMillis;
|
AtomicWord<int64_t> lastTimeBetweenVoteAndCommitMillis;
|
||||||
};
|
};
|
||||||
|
|
@ -211,6 +222,12 @@ constexpr StringData kIndexesFieldName = "indexes"_sd;
|
||||||
constexpr StringData kKeyFieldName = "key"_sd;
|
constexpr StringData kKeyFieldName = "key"_sd;
|
||||||
constexpr StringData kUniqueFieldName = "unique"_sd;
|
constexpr StringData kUniqueFieldName = "unique"_sd;
|
||||||
constexpr StringData kPrepareUniqueFieldName = "prepareUnique"_sd;
|
constexpr StringData kPrepareUniqueFieldName = "prepareUnique"_sd;
|
||||||
|
constexpr StringData kLastTimeBetweenCommitOplogAndCommitMillis =
|
||||||
|
"lastTimeBetweenCommitOplogAndCommitMillis"_sd;
|
||||||
|
constexpr StringData kLastTimeBetweenCommitOplogAndCommitMillisStartupRecovery =
|
||||||
|
"lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery"_sd;
|
||||||
|
constexpr StringData kLastTimeBetweenCommitOplogAndCommitMillisRestore =
|
||||||
|
"lastTimeBetweenCommitOplogAndCommitMillisRestore"_sd;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns true if we should build the indexes an empty collection using the IndexCatalog and
|
* Returns true if we should build the indexes an empty collection using the IndexCatalog and
|
||||||
|
|
@ -597,17 +614,19 @@ void storeLastTimeBetweenVoteAndCommitMillis(const ReplIndexBuildState& replStat
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stores the duration between receiving the `commitIndexBuild` oplog entry and committing the
|
* Stores the duration between receiving the `commitIndexBuild` oplog entry and committing the index
|
||||||
|
* build, parameterized for the different scenarios that we could be applying the entry from.
|
||||||
*/
|
*/
|
||||||
void storeLastTimeBetweenCommitOplogAndCommit(const ReplIndexBuildState& replState) {
|
void storeLastTimeBetweenCommitOplogAndCommit(const ReplIndexBuildState& replState,
|
||||||
|
StringData metricName,
|
||||||
|
AtomicWord<int64_t>& metric) {
|
||||||
const auto metrics = replState.getIndexBuildMetrics();
|
const auto metrics = replState.getIndexBuildMetrics();
|
||||||
const auto now = Date_t::now();
|
const auto now = Date_t::now();
|
||||||
tassert(11436300,
|
tassert(11436300,
|
||||||
"commitIndexOplogEntryTime was not set before setting "
|
str::stream() << "commitIndexOplogEntryTime was not set before setting " << metricName,
|
||||||
"lastTimeBetweenCommitOplogAndCommitMillis",
|
|
||||||
metrics.commitIndexOplogEntryTime != Date_t::min());
|
metrics.commitIndexOplogEntryTime != Date_t::min());
|
||||||
const auto elapsedTime = (now - metrics.commitIndexOplogEntryTime).count();
|
const auto elapsedTime = (now - metrics.commitIndexOplogEntryTime).count();
|
||||||
indexBuildsSSS.lastTimeBetweenCommitOplogAndCommitMillis.store(elapsedTime);
|
metric.store(elapsedTime);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
@ -1294,7 +1313,7 @@ void IndexBuildsCoordinator::applyCommitIndexBuild(OperationContext* opCtx,
|
||||||
|
|
||||||
auto swReplState = _getIndexBuild(buildUUID);
|
auto swReplState = _getIndexBuild(buildUUID);
|
||||||
auto replCoord = repl::ReplicationCoordinator::get(opCtx);
|
auto replCoord = repl::ReplicationCoordinator::get(opCtx);
|
||||||
|
bool restartingPausedIndexInStandaloneOrRestore = false;
|
||||||
// Index builds are not restarted in standalone mode. If the node is started with
|
// Index builds are not restarted in standalone mode. If the node is started with
|
||||||
// recoverFromOplogAsStandalone or is in magic restore and when replaying the commitIndexBuild
|
// recoverFromOplogAsStandalone or is in magic restore and when replaying the commitIndexBuild
|
||||||
// oplog entry for a paused index, there is no active index build thread to commit.
|
// oplog entry for a paused index, there is no active index build thread to commit.
|
||||||
|
|
@ -1313,6 +1332,7 @@ void IndexBuildsCoordinator::applyCommitIndexBuild(OperationContext* opCtx,
|
||||||
|
|
||||||
// Get the builder.
|
// Get the builder.
|
||||||
swReplState = _getIndexBuild(buildUUID);
|
swReplState = _getIndexBuild(buildUUID);
|
||||||
|
restartingPausedIndexInStandaloneOrRestore = true;
|
||||||
}
|
}
|
||||||
auto replState = uassertStatusOK(swReplState);
|
auto replState = uassertStatusOK(swReplState);
|
||||||
replState->setMultikey(std::move(oplogEntry.multikey));
|
replState->setMultikey(std::move(oplogEntry.multikey));
|
||||||
|
|
@ -1333,7 +1353,22 @@ void IndexBuildsCoordinator::applyCommitIndexBuild(OperationContext* opCtx,
|
||||||
"buildUUID"_attr = buildUUID,
|
"buildUUID"_attr = buildUUID,
|
||||||
"waitResult"_attr = waitStatus,
|
"waitResult"_attr = waitStatus,
|
||||||
"status"_attr = buildStatus);
|
"status"_attr = buildStatus);
|
||||||
storeLastTimeBetweenCommitOplogAndCommit(*replState);
|
if (restartingPausedIndexInStandaloneOrRestore) {
|
||||||
|
storeLastTimeBetweenCommitOplogAndCommit(
|
||||||
|
*replState,
|
||||||
|
kLastTimeBetweenCommitOplogAndCommitMillisRestore,
|
||||||
|
indexBuildsSSS.lastTimeBetweenCommitOplogAndCommitMillisRestore);
|
||||||
|
} else if (InReplicationRecovery::isSet(opCtx->getServiceContext())) {
|
||||||
|
storeLastTimeBetweenCommitOplogAndCommit(
|
||||||
|
*replState,
|
||||||
|
kLastTimeBetweenCommitOplogAndCommitMillisStartupRecovery,
|
||||||
|
indexBuildsSSS.lastTimeBetweenCommitOplogAndCommitMillisStartupRecovery);
|
||||||
|
} else {
|
||||||
|
storeLastTimeBetweenCommitOplogAndCommit(
|
||||||
|
*replState,
|
||||||
|
kLastTimeBetweenCommitOplogAndCommitMillis,
|
||||||
|
indexBuildsSSS.lastTimeBetweenCommitOplogAndCommitMillis);
|
||||||
|
}
|
||||||
|
|
||||||
// Throws if there was an error building the index.
|
// Throws if there was an error building the index.
|
||||||
fut.get();
|
fut.get();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue