SERVER-113749 Add deprioritization stats to queryStats (#44816)

GitOrigin-RevId: 437a47a8c7494c366a6dd5b04d0ae14ccfd1254a
This commit is contained in:
Marcos Grillo 2025-12-15 20:32:08 +01:00 committed by MongoDB Bot
parent 50bac32127
commit 3bb367fcb1
15 changed files with 308 additions and 19 deletions

View File

@ -13,9 +13,10 @@
import {configureFailPoint} from "jstests/libs/fail_point_util.js"; import {configureFailPoint} from "jstests/libs/fail_point_util.js";
import {findMatchingLogLine} from "jstests/libs/log.js"; import {findMatchingLogLine} from "jstests/libs/log.js";
import {after, before, describe, it} from "jstests/libs/mochalite.js"; import {after, before, describe, it} from "jstests/libs/mochalite.js";
import {funWithArgs} from "jstests/libs/parallel_shell_helpers.js";
import {ReplSetTest} from "jstests/libs/replsettest.js";
import {Thread} from "jstests/libs/parallelTester.js"; import {Thread} from "jstests/libs/parallelTester.js";
import {funWithArgs} from "jstests/libs/parallel_shell_helpers.js";
import {getQueryExecMetrics, getQueryStats} from "jstests/libs/query/query_stats_utils.js";
import {ReplSetTest} from "jstests/libs/replsettest.js";
import { import {
getExecutionControlStats, getExecutionControlStats,
getTotalDeprioritizationCount, getTotalDeprioritizationCount,
@ -404,6 +405,58 @@ describe("Execution control statistics and observability", function () {
); );
}); });
}); });
describe("deprioritization reporting in query execution metrics", function () {
let replTest, mongod, db, coll;
const findComment = "deprioritized_find_test";
before(function () {
replTest = new ReplSetTest({
nodes: 1,
nodeOptions: {
setParameter: {
// Force the query to yield frequently to better expose the low-priority
// behavior.
internalQueryExecYieldIterations: 1,
executionControlDeprioritizationGate: true,
executionControlHeuristicNumAdmissionsDeprioritizeThreshold: 1,
logComponentVerbosity: {command: 2},
internalQueryStatsRateLimit: -1,
},
slowms: 0,
},
});
replTest.startSet();
replTest.initiate();
mongod = replTest.getPrimary();
db = mongod.getDB(jsTestName());
coll = db.coll;
insertTestDocuments(coll, 100);
});
after(function () {
replTest.stopSet();
});
// TODO SERVER-112150: Once all versions support QueryStatsMetricsSubsections feature flag, simply check metrics["queryExec"].
function verifyQueryStatsMetrics(metrics) {
let statsObj = metrics.hasOwnProperty("queryExec") ? metrics["queryExec"] : metrics;
assert.gte(statsObj.totalTimeQueuedMicros.sum, 0);
assert.gte(statsObj.totalAdmissions.sum, 0);
assert.gte(statsObj.wasLoadShed.false, 0);
assert.gte(statsObj.wasDeprioritized.false, 0);
}
it("should report deprioritization in query execution metrics", function () {
runDeprioritizedFind(coll, findComment + "_totalization");
const queryStats = getQueryStats(mongod, {collName: coll.getName()});
assert(
queryStats.length === 1,
"Expected to find exactly one query stats entry for '" + coll.getName() + "' " + tojson(queryStats),
);
verifyQueryStatsMetrics(queryStats[0].metrics);
});
});
describe("Delinquent operation statistics", function () { describe("Delinquent operation statistics", function () {
let replTest, mongod, db, coll; let replTest, mongod, db, coll;

View File

@ -50,6 +50,11 @@ function assertMetricsEqual(
assertMetricEqual(metrics, "totalAcquisitionDelinquencyMillis", 0); assertMetricEqual(metrics, "totalAcquisitionDelinquencyMillis", 0);
assertMetricEqual(metrics, "maxAcquisitionDelinquencyMillis", 0); assertMetricEqual(metrics, "maxAcquisitionDelinquencyMillis", 0);
assertMetricEqual(metrics, "overdueInterruptApproxMaxMillis", 0); assertMetricEqual(metrics, "overdueInterruptApproxMaxMillis", 0);
// The exact value of the following metrics cannot be determined beforehand, but we can assert their existence.
assertMetricEqual(metrics, "totalTimeQueuedMicros", undefined);
assertMetricEqual(metrics, "totalAdmissions", undefined);
assertMetricEqual(metrics, "wasLoadShed", undefined);
assertMetricEqual(metrics, "wasDeprioritized", undefined);
} }
function assertWriteMetricsEqual( function assertWriteMetricsEqual(

View File

@ -488,12 +488,13 @@ void CurOp::setEndOfOpMetrics(long long nreturned) {
metrics.cpuNanos = metrics.cpuNanos.value_or(Nanoseconds(0)) + _debug.cpuTime; metrics.cpuNanos = metrics.cpuNanos.value_or(Nanoseconds(0)) + _debug.cpuTime;
if (const auto& admCtx = ExecutionAdmissionContext::get(opCtx()); if (const auto& admCtx = ExecutionAdmissionContext::get(opCtx());
admCtx.getDelinquentAcquisitions() > 0 && !opCtx()->inMultiDocumentTransaction() && !opCtx()->inMultiDocumentTransaction() && !parent()) {
!parent()) { if (admCtx.getDelinquentAcquisitions() > 0) {
// Note that we don't record delinquency stats around ticketing when in a // Note that we don't record delinquency stats around ticketing when in a
// multi-document transaction, since operations within multi-document transactions hold // multi-document transaction, since operations within multi-document
// tickets for a long time by design and reporting them as delinquent will just create // transactions hold tickets for a long time by design and reporting them as
// noise in the data. // delinquent will just create noise in the data.
metrics.delinquentAcquisitions = metrics.delinquentAcquisitions.value_or(0) + metrics.delinquentAcquisitions = metrics.delinquentAcquisitions.value_or(0) +
static_cast<uint64_t>(admCtx.getDelinquentAcquisitions()); static_cast<uint64_t>(admCtx.getDelinquentAcquisitions());
@ -504,6 +505,20 @@ void CurOp::setEndOfOpMetrics(long long nreturned) {
std::max(metrics.maxAcquisitionDelinquency.value_or(Milliseconds(0)).count(), std::max(metrics.maxAcquisitionDelinquency.value_or(Milliseconds(0)).count(),
admCtx.getMaxAcquisitionDelinquencyMillis())}; admCtx.getMaxAcquisitionDelinquencyMillis())};
} }
// Only record execution control metrics for non exempted operations, that is, user
// operations, this is done to prevent having extra noise on the statistics comming from
// internal operations.
if (admCtx.getAdmissions() > 0 && admCtx.getExemptedAdmissions() == 0) {
metrics.totalTimeQueuedMicros =
metrics.totalTimeQueuedMicros.value_or(Microseconds(0)) +
admCtx.totalTimeQueuedMicros();
metrics.totalAdmissions = metrics.totalAdmissions.value_or(0) +
(admCtx.getAdmissions() - admCtx.getExemptedAdmissions());
metrics.wasLoadShed = metrics.wasLoadShed.value_or(false) || admCtx.getLoadShed();
metrics.wasDeprioritized =
metrics.wasDeprioritized.value_or(false) || admCtx.getLoadShed();
}
}
if (!parent()) { if (!parent()) {
metrics.numInterruptChecks = opCtx()->numInterruptChecks(); metrics.numInterruptChecks = opCtx()->numInterruptChecks();
@ -1166,6 +1181,12 @@ void CurOp::reportState(BSONObjBuilder* builder,
BSONObjBuilder sub(builder->subobjStart("delinquencyInfo")); BSONObjBuilder sub(builder->subobjStart("delinquencyInfo"));
OpDebug::appendDelinquentInfo(opCtx, sub); OpDebug::appendDelinquentInfo(opCtx, sub);
} }
if (admCtx.getAdmissions() > 0) {
builder->append("totalTimeQueuedMicros", admCtx.totalTimeQueuedMicros().count());
builder->append("totalAdmissions", admCtx.getAdmissions());
builder->append("wasLoadShed", admCtx.getLoadShed());
builder->append("wasDeprioritized", admCtx.getPriorityLowered());
}
} }
populateCurrentOpQueueStats(opCtx, _tickSource, builder); populateCurrentOpQueueStats(opCtx, _tickSource, builder);

View File

@ -105,6 +105,14 @@ TEST(CurOpTest, AddingAdditiveMetricsObjectsTogetherShouldAddFieldsTogether) {
additiveMetricsToAdd.totalAcquisitionDelinquency = Milliseconds{200}; additiveMetricsToAdd.totalAcquisitionDelinquency = Milliseconds{200};
currentAdditiveMetrics.maxAcquisitionDelinquency = Milliseconds{300}; currentAdditiveMetrics.maxAcquisitionDelinquency = Milliseconds{300};
additiveMetricsToAdd.maxAcquisitionDelinquency = Milliseconds{100}; additiveMetricsToAdd.maxAcquisitionDelinquency = Milliseconds{100};
currentAdditiveMetrics.totalTimeQueuedMicros = Microseconds{300};
additiveMetricsToAdd.totalTimeQueuedMicros = Microseconds{200};
currentAdditiveMetrics.totalAdmissions = 1;
additiveMetricsToAdd.totalAdmissions = 2;
currentAdditiveMetrics.wasLoadShed = false;
additiveMetricsToAdd.wasLoadShed = true;
currentAdditiveMetrics.wasDeprioritized = false;
additiveMetricsToAdd.wasDeprioritized = true;
currentAdditiveMetrics.numInterruptChecks = 1; currentAdditiveMetrics.numInterruptChecks = 1;
additiveMetricsToAdd.numInterruptChecks = 2; additiveMetricsToAdd.numInterruptChecks = 2;
currentAdditiveMetrics.overdueInterruptApproxMax = Milliseconds{100}; currentAdditiveMetrics.overdueInterruptApproxMax = Milliseconds{100};
@ -154,6 +162,15 @@ TEST(CurOpTest, AddingAdditiveMetricsObjectsTogetherShouldAddFieldsTogether) {
ASSERT_EQ(*currentAdditiveMetrics.maxAcquisitionDelinquency, ASSERT_EQ(*currentAdditiveMetrics.maxAcquisitionDelinquency,
std::max(*additiveMetricsBeforeAdd.maxAcquisitionDelinquency, std::max(*additiveMetricsBeforeAdd.maxAcquisitionDelinquency,
*additiveMetricsToAdd.maxAcquisitionDelinquency)); *additiveMetricsToAdd.maxAcquisitionDelinquency));
ASSERT_EQ(*currentAdditiveMetrics.totalTimeQueuedMicros,
*additiveMetricsBeforeAdd.totalTimeQueuedMicros +
*additiveMetricsToAdd.totalTimeQueuedMicros);
ASSERT_EQ(*currentAdditiveMetrics.totalAdmissions,
*additiveMetricsBeforeAdd.totalAdmissions + *additiveMetricsToAdd.totalAdmissions);
ASSERT_EQ(*currentAdditiveMetrics.wasLoadShed,
*additiveMetricsBeforeAdd.wasLoadShed || *additiveMetricsToAdd.wasLoadShed);
ASSERT_EQ(*currentAdditiveMetrics.wasDeprioritized,
*additiveMetricsBeforeAdd.wasDeprioritized || *additiveMetricsToAdd.wasDeprioritized);
ASSERT_EQ(*currentAdditiveMetrics.numInterruptChecks, ASSERT_EQ(*currentAdditiveMetrics.numInterruptChecks,
*additiveMetricsBeforeAdd.numInterruptChecks + *additiveMetricsBeforeAdd.numInterruptChecks +
*additiveMetricsToAdd.numInterruptChecks); *additiveMetricsToAdd.numInterruptChecks);
@ -181,6 +198,11 @@ TEST(CurOpTest, AddingUninitializedAdditiveMetricsFieldsShouldBeTreatedAsZero) {
additiveMetricsToAdd.delinquentAcquisitions = 1; additiveMetricsToAdd.delinquentAcquisitions = 1;
additiveMetricsToAdd.totalAcquisitionDelinquency = Milliseconds(100); additiveMetricsToAdd.totalAcquisitionDelinquency = Milliseconds(100);
additiveMetricsToAdd.maxAcquisitionDelinquency = Milliseconds(100); additiveMetricsToAdd.maxAcquisitionDelinquency = Milliseconds(100);
additiveMetricsToAdd.totalTimeQueuedMicros = Microseconds(100);
additiveMetricsToAdd.totalAdmissions = 1;
additiveMetricsToAdd.wasLoadShed = true;
additiveMetricsToAdd.wasDeprioritized = true;
additiveMetricsToAdd.maxAcquisitionDelinquency = Milliseconds(100);
additiveMetricsToAdd.numInterruptChecks = 1; additiveMetricsToAdd.numInterruptChecks = 1;
additiveMetricsToAdd.overdueInterruptApproxMax = Milliseconds(100); additiveMetricsToAdd.overdueInterruptApproxMax = Milliseconds(100);
@ -239,6 +261,14 @@ TEST(CurOpTest, AddingUninitializedAdditiveMetricsFieldsShouldBeTreatedAsZero) {
ASSERT_EQ(*currentAdditiveMetrics.numInterruptChecks, *additiveMetricsToAdd.numInterruptChecks); ASSERT_EQ(*currentAdditiveMetrics.numInterruptChecks, *additiveMetricsToAdd.numInterruptChecks);
ASSERT_EQ(*currentAdditiveMetrics.overdueInterruptApproxMax, ASSERT_EQ(*currentAdditiveMetrics.overdueInterruptApproxMax,
*additiveMetricsToAdd.overdueInterruptApproxMax); *additiveMetricsToAdd.overdueInterruptApproxMax);
// The execution control fields for the current AdditiveMetrics object were not initialized, so
// they should be treated as zero.
ASSERT_EQ(*currentAdditiveMetrics.totalTimeQueuedMicros,
*additiveMetricsToAdd.totalTimeQueuedMicros);
ASSERT_EQ(*currentAdditiveMetrics.totalAdmissions, *additiveMetricsToAdd.totalAdmissions);
ASSERT_EQ(*currentAdditiveMetrics.wasLoadShed, *additiveMetricsToAdd.wasLoadShed);
ASSERT_EQ(*currentAdditiveMetrics.wasDeprioritized, *additiveMetricsToAdd.wasDeprioritized);
} }
TEST(CurOpTest, AdditiveMetricsFieldsShouldIncrementByN) { TEST(CurOpTest, AdditiveMetricsFieldsShouldIncrementByN) {
@ -278,6 +308,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateCursorMetrics) {
additiveMetrics.delinquentAcquisitions = 2; additiveMetrics.delinquentAcquisitions = 2;
additiveMetrics.totalAcquisitionDelinquency = Milliseconds(400); additiveMetrics.totalAcquisitionDelinquency = Milliseconds(400);
additiveMetrics.maxAcquisitionDelinquency = Milliseconds(300); additiveMetrics.maxAcquisitionDelinquency = Milliseconds(300);
additiveMetrics.totalTimeQueuedMicros = Microseconds(400);
additiveMetrics.totalAdmissions = 2;
additiveMetrics.wasLoadShed = false;
additiveMetrics.wasDeprioritized = false;
additiveMetrics.numInterruptChecks = 2; additiveMetrics.numInterruptChecks = 2;
additiveMetrics.overdueInterruptApproxMax = Milliseconds(100); additiveMetrics.overdueInterruptApproxMax = Milliseconds(100);
additiveMetrics.nMatched = 1; additiveMetrics.nMatched = 1;
@ -286,7 +320,6 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateCursorMetrics) {
additiveMetrics.ndeleted = 0; additiveMetrics.ndeleted = 0;
additiveMetrics.ninserted = 0; additiveMetrics.ninserted = 0;
CursorMetrics cursorMetrics(3 /* keysExamined */, CursorMetrics cursorMetrics(3 /* keysExamined */,
4 /* docsExamined */, 4 /* docsExamined */,
10 /* bytesRead */, 10 /* bytesRead */,
@ -307,6 +340,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateCursorMetrics) {
cursorMetrics.setTotalAcquisitionDelinquencyMillis(400); cursorMetrics.setTotalAcquisitionDelinquencyMillis(400);
cursorMetrics.setMaxAcquisitionDelinquencyMillis(200); cursorMetrics.setMaxAcquisitionDelinquencyMillis(200);
cursorMetrics.setOverdueInterruptApproxMaxMillis(200); cursorMetrics.setOverdueInterruptApproxMaxMillis(200);
cursorMetrics.setTotalTimeQueuedMicros(400);
cursorMetrics.setTotalAdmissions(5);
cursorMetrics.setWasLoadShed(true);
cursorMetrics.setWasDeprioritized(true);
additiveMetrics.aggregateCursorMetrics(cursorMetrics); additiveMetrics.aggregateCursorMetrics(cursorMetrics);
@ -328,6 +365,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateCursorMetrics) {
ASSERT_EQ(*additiveMetrics.nModified, 2); ASSERT_EQ(*additiveMetrics.nModified, 2);
ASSERT_EQ(*additiveMetrics.ndeleted, 0); ASSERT_EQ(*additiveMetrics.ndeleted, 0);
ASSERT_EQ(*additiveMetrics.ninserted, 0); ASSERT_EQ(*additiveMetrics.ninserted, 0);
ASSERT_EQ(*additiveMetrics.totalTimeQueuedMicros, Microseconds(800));
ASSERT_EQ(*additiveMetrics.totalAdmissions, 7);
ASSERT_EQ(*additiveMetrics.wasLoadShed, true);
ASSERT_EQ(*additiveMetrics.wasDeprioritized, true);
} }
TEST(CurOpTest, AdditiveMetricsShouldAggregateNegativeCpuNanos) { TEST(CurOpTest, AdditiveMetricsShouldAggregateNegativeCpuNanos) {
@ -400,6 +441,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateDataBearingNodeMetrics) {
additiveMetrics.delinquentAcquisitions = 2; additiveMetrics.delinquentAcquisitions = 2;
additiveMetrics.totalAcquisitionDelinquency = Milliseconds(400); additiveMetrics.totalAcquisitionDelinquency = Milliseconds(400);
additiveMetrics.maxAcquisitionDelinquency = Milliseconds(200); additiveMetrics.maxAcquisitionDelinquency = Milliseconds(200);
additiveMetrics.totalTimeQueuedMicros = Microseconds(400);
additiveMetrics.totalAdmissions = 3;
additiveMetrics.wasLoadShed = true;
additiveMetrics.wasDeprioritized = true;
additiveMetrics.numInterruptChecks = 2; additiveMetrics.numInterruptChecks = 2;
additiveMetrics.overdueInterruptApproxMax = Milliseconds(100); additiveMetrics.overdueInterruptApproxMax = Milliseconds(100);
@ -413,6 +458,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateDataBearingNodeMetrics) {
remoteMetrics.delinquentAcquisitions = 1; remoteMetrics.delinquentAcquisitions = 1;
remoteMetrics.totalAcquisitionDelinquency = Milliseconds(300); remoteMetrics.totalAcquisitionDelinquency = Milliseconds(300);
remoteMetrics.maxAcquisitionDelinquency = Milliseconds(300); remoteMetrics.maxAcquisitionDelinquency = Milliseconds(300);
remoteMetrics.totalTimeQueuedMicros = Microseconds(300);
remoteMetrics.totalAdmissions = 2;
remoteMetrics.wasLoadShed = false;
remoteMetrics.wasDeprioritized = false;
remoteMetrics.numInterruptChecks = 1; remoteMetrics.numInterruptChecks = 1;
remoteMetrics.overdueInterruptApproxMax = Milliseconds(300); remoteMetrics.overdueInterruptApproxMax = Milliseconds(300);
@ -427,6 +476,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateDataBearingNodeMetrics) {
ASSERT_EQ(*additiveMetrics.delinquentAcquisitions, 3); ASSERT_EQ(*additiveMetrics.delinquentAcquisitions, 3);
ASSERT_EQ(*additiveMetrics.totalAcquisitionDelinquency, Milliseconds(700)); ASSERT_EQ(*additiveMetrics.totalAcquisitionDelinquency, Milliseconds(700));
ASSERT_EQ(*additiveMetrics.maxAcquisitionDelinquency, Milliseconds(300)); ASSERT_EQ(*additiveMetrics.maxAcquisitionDelinquency, Milliseconds(300));
ASSERT_EQ(*additiveMetrics.totalTimeQueuedMicros, Microseconds(700));
ASSERT_EQ(*additiveMetrics.totalAdmissions, 5);
ASSERT_EQ(*additiveMetrics.wasLoadShed, true);
ASSERT_EQ(*additiveMetrics.wasDeprioritized, true);
ASSERT_EQ(*additiveMetrics.numInterruptChecks, 3); ASSERT_EQ(*additiveMetrics.numInterruptChecks, 3);
ASSERT_EQ(*additiveMetrics.overdueInterruptApproxMax, Milliseconds(300)); ASSERT_EQ(*additiveMetrics.overdueInterruptApproxMax, Milliseconds(300));
} }
@ -696,6 +749,41 @@ TEST(CurOpTest, ReportStateIncludesDelinquentStatsIfNonZero) {
} }
} }
TEST(CurOpTest, ReportDeprioritizationStats) {
QueryTestServiceContext serviceContext;
auto opCtx = serviceContext.makeOperationContext();
auto curOp = CurOp::get(*opCtx);
curOp->ensureStarted();
// 1. Check the default state. If they're zero, they're not reported.
{
BSONObjBuilder bob;
curOp->reportState(&bob, SerializationContext{});
BSONObj state = bob.obj();
ASSERT_FALSE(state.hasField("totalTimeQueuedMicros"));
ASSERT_FALSE(state.hasField("totalAdmissions"));
ASSERT_FALSE(state.hasField("wasLoadShed"));
ASSERT_FALSE(state.hasField("wasDeprioritized"));
}
// 2. If there are admissions then report the stats.
{
ExecutionAdmissionContext::get(opCtx.get()).setAdmission_forTest(1);
ExecutionAdmissionContext::get(opCtx.get()).setTotalTimeQueuedMicros_forTest(100);
BSONObjBuilder bob;
curOp->reportState(&bob, SerializationContext{});
BSONObj state = bob.obj();
ASSERT_TRUE(state.hasField("totalAdmissions"));
ASSERT_EQ(state["totalAdmissions"].Number(), 1);
ASSERT_TRUE(state.hasField("totalTimeQueuedMicros"));
ASSERT_EQ(state["totalTimeQueuedMicros"].Number(), 100);
ASSERT_TRUE(state.hasField("wasLoadShed"));
ASSERT_FALSE(state["wasLoadShed"].Bool());
ASSERT_TRUE(state.hasField("wasDeprioritized"));
ASSERT_FALSE(state["wasDeprioritized"].Bool());
}
}
TEST(CurOpTest, ReportStateIncludesPriorityLowered) { TEST(CurOpTest, ReportStateIncludesPriorityLowered) {
QueryTestServiceContext serviceContext; QueryTestServiceContext serviceContext;
auto opCtx = serviceContext.makeOperationContext(); auto opCtx = serviceContext.makeOperationContext();

View File

@ -1335,6 +1335,12 @@ CursorMetrics OpDebug::getCursorMetrics() const {
metrics.setMaxAcquisitionDelinquencyMillis( metrics.setMaxAcquisitionDelinquencyMillis(
additiveMetrics.maxAcquisitionDelinquency.value_or(Milliseconds(0)).count()); additiveMetrics.maxAcquisitionDelinquency.value_or(Milliseconds(0)).count());
metrics.setTotalTimeQueuedMicros(
additiveMetrics.totalTimeQueuedMicros.value_or(Microseconds(0)).count());
metrics.setTotalAdmissions(additiveMetrics.totalAdmissions.value_or(0));
metrics.setWasLoadShed(additiveMetrics.wasLoadShed.value_or(false));
metrics.setWasDeprioritized(additiveMetrics.wasDeprioritized.value_or(false));
metrics.setNumInterruptChecks(additiveMetrics.numInterruptChecks.value_or(0)); metrics.setNumInterruptChecks(additiveMetrics.numInterruptChecks.value_or(0));
metrics.setOverdueInterruptApproxMaxMillis( metrics.setOverdueInterruptApproxMaxMillis(
additiveMetrics.overdueInterruptApproxMax.value_or(Milliseconds(0)).count()); additiveMetrics.overdueInterruptApproxMax.value_or(Milliseconds(0)).count());
@ -1481,6 +1487,10 @@ void OpDebug::AdditiveMetrics::add(const AdditiveMetrics& otherMetrics) {
maxAcquisitionDelinquency = std::max(maxAcquisitionDelinquency.value_or(Milliseconds(0)), maxAcquisitionDelinquency = std::max(maxAcquisitionDelinquency.value_or(Milliseconds(0)),
*otherMetrics.maxAcquisitionDelinquency); *otherMetrics.maxAcquisitionDelinquency);
} }
totalTimeQueuedMicros = addOptionals(totalTimeQueuedMicros, otherMetrics.totalTimeQueuedMicros);
totalAdmissions = addOptionals(totalAdmissions, otherMetrics.totalAdmissions);
wasLoadShed = addOptionals(wasLoadShed, otherMetrics.wasLoadShed);
wasDeprioritized = addOptionals(wasDeprioritized, otherMetrics.wasDeprioritized);
hasSortStage = hasSortStage || otherMetrics.hasSortStage; hasSortStage = hasSortStage || otherMetrics.hasSortStage;
usedDisk = usedDisk || otherMetrics.usedDisk; usedDisk = usedDisk || otherMetrics.usedDisk;
@ -1514,6 +1524,12 @@ void OpDebug::AdditiveMetrics::aggregateDataBearingNodeMetrics(
maxAcquisitionDelinquency = std::max(maxAcquisitionDelinquency.value_or(Milliseconds(0)), maxAcquisitionDelinquency = std::max(maxAcquisitionDelinquency.value_or(Milliseconds(0)),
metrics.maxAcquisitionDelinquency); metrics.maxAcquisitionDelinquency);
totalTimeQueuedMicros =
totalTimeQueuedMicros.value_or(Microseconds(0)) + metrics.totalTimeQueuedMicros;
totalAdmissions = totalAdmissions.value_or(0) + metrics.totalAdmissions;
wasLoadShed = wasLoadShed.value_or(false) || metrics.wasLoadShed;
wasDeprioritized = wasDeprioritized.value_or(false) || metrics.wasDeprioritized;
numInterruptChecks = numInterruptChecks.value_or(0) + metrics.numInterruptChecks; numInterruptChecks = numInterruptChecks.value_or(0) + metrics.numInterruptChecks;
overdueInterruptApproxMax = std::max(overdueInterruptApproxMax.value_or(Milliseconds(0)), overdueInterruptApproxMax = std::max(overdueInterruptApproxMax.value_or(Milliseconds(0)),
metrics.overdueInterruptApproxMax); metrics.overdueInterruptApproxMax);
@ -1558,7 +1574,11 @@ void OpDebug::AdditiveMetrics::aggregateCursorMetrics(const CursorMetrics& metri
static_cast<uint64_t>(metrics.getNUpserted()), static_cast<uint64_t>(metrics.getNUpserted()),
static_cast<uint64_t>(metrics.getNModified()), static_cast<uint64_t>(metrics.getNModified()),
static_cast<uint64_t>(metrics.getNDeleted()), static_cast<uint64_t>(metrics.getNDeleted()),
static_cast<uint64_t>(metrics.getNInserted())}); static_cast<uint64_t>(metrics.getNInserted()),
Microseconds(metrics.getTotalTimeQueuedMicros()),
static_cast<uint64_t>(metrics.getTotalAdmissions()),
metrics.getWasLoadShed(),
metrics.getWasDeprioritized()});
} }
void OpDebug::AdditiveMetrics::aggregateStorageStats(const StorageStats& stats) { void OpDebug::AdditiveMetrics::aggregateStorageStats(const StorageStats& stats) {

View File

@ -230,6 +230,13 @@ public:
boost::optional<uint64_t> numInterruptChecks; boost::optional<uint64_t> numInterruptChecks;
boost::optional<Milliseconds> overdueInterruptApproxMax; boost::optional<Milliseconds> overdueInterruptApproxMax;
// If query stats are being collected for this operation, stores the execution control
// statistics.
boost::optional<Microseconds> totalTimeQueuedMicros;
boost::optional<uint64_t> totalAdmissions;
boost::optional<bool> wasLoadShed;
boost::optional<bool> wasDeprioritized;
}; };
MONGO_MOD_PRIVATE OpDebug() = default; MONGO_MOD_PRIVATE OpDebug() = default;

View File

@ -135,6 +135,26 @@ structs:
description: "The number of documents inserted (excluding upserts)." description: "The number of documents inserted (excluding upserts)."
type: long type: long
stability: unstable stability: unstable
totalTimeQueuedMicros:
description: "Total time spent waiting for a ticket."
type: long
stability: unstable
default: 0
totalAdmissions:
description: "Total number of admissions during the operation."
type: long
stability: unstable
default: 0
wasLoadShed:
description: "If true, the operation was load shed."
type: bool
stability: unstable
default: false
wasDeprioritized:
description: "If true, the operation was deprioritized at some point."
type: bool
stability: unstable
default: false
ResponseCursorBase: ResponseCursorBase:
description: "Common fields of initial and subsequent cursor responses." description: "Common fields of initial and subsequent cursor responses."

View File

@ -63,6 +63,10 @@ static const BSONObj basicMetricsObj = fromjson(R"({
delinquentAcquisitions: {"$numberLong": "0"}, delinquentAcquisitions: {"$numberLong": "0"},
totalAcquisitionDelinquencyMillis: {"$numberLong": "0"}, totalAcquisitionDelinquencyMillis: {"$numberLong": "0"},
maxAcquisitionDelinquencyMillis: {"$numberLong": "0"}, maxAcquisitionDelinquencyMillis: {"$numberLong": "0"},
totalTimeQueuedMicros: {"$numberLong": "0"},
totalAdmissions: {"$numberLong": "0"},
wasLoadShed: false,
wasDeprioritized: false,
numInterruptChecks: {"$numberLong": "0"}, numInterruptChecks: {"$numberLong": "0"},
overdueInterruptApproxMaxMillis: {"$numberLong": "0"}, overdueInterruptApproxMaxMillis: {"$numberLong": "0"},
nMatched: {"$numberLong": "0"}, nMatched: {"$numberLong": "0"},
@ -323,6 +327,10 @@ TEST(CursorResponseTest, parseFromBSONCursorMetrics) {
ASSERT_EQ(metrics.getDelinquentAcquisitions(), 0); ASSERT_EQ(metrics.getDelinquentAcquisitions(), 0);
ASSERT_EQ(metrics.getTotalAcquisitionDelinquencyMillis(), 0); ASSERT_EQ(metrics.getTotalAcquisitionDelinquencyMillis(), 0);
ASSERT_EQ(metrics.getMaxAcquisitionDelinquencyMillis(), 0); ASSERT_EQ(metrics.getMaxAcquisitionDelinquencyMillis(), 0);
ASSERT_EQ(metrics.getTotalTimeQueuedMicros(), 0);
ASSERT_EQ(metrics.getTotalAdmissions(), 0);
ASSERT_FALSE(metrics.getWasLoadShed());
ASSERT_FALSE(metrics.getWasDeprioritized());
ASSERT_EQ(metrics.getNumInterruptChecks(), 0); ASSERT_EQ(metrics.getNumInterruptChecks(), 0);
ASSERT_EQ(metrics.getOverdueInterruptApproxMaxMillis(), 0); ASSERT_EQ(metrics.getOverdueInterruptApproxMaxMillis(), 0);
ASSERT_EQ(metrics.getNMatched(), 0); ASSERT_EQ(metrics.getNMatched(), 0);
@ -991,6 +999,10 @@ TEST_F(CursorResponseBuilderTest, buildResponseWithAllKnownFields) {
ASSERT_EQ(parsedMetrics->getDelinquentAcquisitions(), 0); ASSERT_EQ(parsedMetrics->getDelinquentAcquisitions(), 0);
ASSERT_EQ(parsedMetrics->getTotalAcquisitionDelinquencyMillis(), 0); ASSERT_EQ(parsedMetrics->getTotalAcquisitionDelinquencyMillis(), 0);
ASSERT_EQ(parsedMetrics->getMaxAcquisitionDelinquencyMillis(), 0); ASSERT_EQ(parsedMetrics->getMaxAcquisitionDelinquencyMillis(), 0);
ASSERT_EQ(parsedMetrics->getTotalTimeQueuedMicros(), 0);
ASSERT_EQ(parsedMetrics->getTotalAdmissions(), 0);
ASSERT_FALSE(parsedMetrics->getWasLoadShed());
ASSERT_FALSE(parsedMetrics->getWasDeprioritized());
ASSERT_EQ(parsedMetrics->getNumInterruptChecks(), 15); ASSERT_EQ(parsedMetrics->getNumInterruptChecks(), 15);
ASSERT_EQ(parsedMetrics->getOverdueInterruptApproxMaxMillis(), 0); ASSERT_EQ(parsedMetrics->getOverdueInterruptApproxMaxMillis(), 0);
ASSERT_EQ(parsedMetrics->getNMatched(), 1); ASSERT_EQ(parsedMetrics->getNMatched(), 1);

View File

@ -69,6 +69,11 @@ struct MONGO_MOD_PUB DataBearingNodeMetrics {
uint64_t nDeleted = 0; uint64_t nDeleted = 0;
uint64_t nInserted = 0; uint64_t nInserted = 0;
Microseconds totalTimeQueuedMicros{0};
uint64_t totalAdmissions = 0;
bool wasLoadShed = false;
bool wasDeprioritized = false;
/** /**
* Adds the fields from the given object into the fields of this object using addition (in the * Adds the fields from the given object into the fields of this object using addition (in the
* case of numeric metrics) or conjunction/disjunction (in the case of boolean metrics). * case of numeric metrics) or conjunction/disjunction (in the case of boolean metrics).
@ -96,6 +101,10 @@ struct MONGO_MOD_PUB DataBearingNodeMetrics {
nModified += other.nModified; nModified += other.nModified;
nDeleted += other.nDeleted; nDeleted += other.nDeleted;
nInserted += other.nInserted; nInserted += other.nInserted;
totalTimeQueuedMicros += other.totalTimeQueuedMicros;
totalAdmissions += other.totalAdmissions;
wasLoadShed = wasLoadShed || other.wasLoadShed;
wasDeprioritized = wasDeprioritized || other.wasDeprioritized;
} }
void add(const boost::optional<DataBearingNodeMetrics>& other) { void add(const boost::optional<DataBearingNodeMetrics>& other) {
@ -131,6 +140,10 @@ struct MONGO_MOD_PUB DataBearingNodeMetrics {
nModified += metrics.getNModified(); nModified += metrics.getNModified();
nDeleted += metrics.getNDeleted(); nDeleted += metrics.getNDeleted();
nInserted += metrics.getNInserted(); nInserted += metrics.getNInserted();
totalTimeQueuedMicros += Microseconds(metrics.getTotalTimeQueuedMicros());
totalAdmissions += metrics.getTotalAdmissions();
wasLoadShed = wasLoadShed || metrics.getWasLoadShed();
wasDeprioritized = wasDeprioritized || metrics.getWasDeprioritized();
} }
}; };

View File

@ -269,6 +269,11 @@ void updateQueryExecStatistics(QueryExecEntry& queryExecEntryToUpdate,
queryExecEntryToUpdate.maxAcquisitionDelinquencyMillis.aggregate( queryExecEntryToUpdate.maxAcquisitionDelinquencyMillis.aggregate(
snapshot.maxAcquisitionDelinquencyMillis); snapshot.maxAcquisitionDelinquencyMillis);
queryExecEntryToUpdate.totalTimeQueuedMicros.aggregate(snapshot.totalTimeQueuedMicros);
queryExecEntryToUpdate.totalAdmissions.aggregate(snapshot.totalAdmissions);
queryExecEntryToUpdate.wasLoadShed.aggregate(snapshot.wasLoadShed);
queryExecEntryToUpdate.wasDeprioritized.aggregate(snapshot.wasDeprioritized);
// Store the number of interrupt checks per second as a rate, this can give us a better sense of // Store the number of interrupt checks per second as a rate, this can give us a better sense of
// how often interrupts are being checked relative to the total execution time across multiple // how often interrupts are being checked relative to the total execution time across multiple
// query runs. // query runs.
@ -503,7 +508,10 @@ QueryStatsSnapshot captureMetrics(const OperationContext* opCtx,
static_cast<uint64_t>(metrics.nModified.value_or(0)), static_cast<uint64_t>(metrics.nModified.value_or(0)),
static_cast<uint64_t>(metrics.ndeleted.value_or(0)), static_cast<uint64_t>(metrics.ndeleted.value_or(0)),
static_cast<uint64_t>(metrics.ninserted.value_or(0)), static_cast<uint64_t>(metrics.ninserted.value_or(0)),
}; metrics.totalTimeQueuedMicros.value_or(Microseconds(0)).count(),
static_cast<uint64_t>(metrics.totalAdmissions.value_or(0)),
metrics.wasLoadShed.value_or(false),
metrics.wasDeprioritized.value_or(false)};
return snapshot; return snapshot;
} }

View File

@ -253,6 +253,11 @@ struct QueryStatsSnapshot {
uint64_t nModified; uint64_t nModified;
uint64_t nDeleted; uint64_t nDeleted;
uint64_t nInserted; uint64_t nInserted;
int64_t totalTimeQueuedMicros;
uint64_t totalAdmissions;
bool wasLoadShed;
bool wasDeprioritized;
}; };
/** /**

View File

@ -93,6 +93,11 @@ void QueryExecEntry::toBSON(BSONObjBuilder& queryStatsBuilder, bool buildAsSubse
totalAcquisitionDelinquencyMillis.appendTo(*builder, "totalAcquisitionDelinquencyMillis"); totalAcquisitionDelinquencyMillis.appendTo(*builder, "totalAcquisitionDelinquencyMillis");
maxAcquisitionDelinquencyMillis.appendTo(*builder, "maxAcquisitionDelinquencyMillis"); maxAcquisitionDelinquencyMillis.appendTo(*builder, "maxAcquisitionDelinquencyMillis");
totalTimeQueuedMicros.appendTo(*builder, "totalTimeQueuedMicros");
totalAdmissions.appendTo(*builder, "totalAdmissions");
wasLoadShed.appendTo(*builder, "wasLoadShed");
wasDeprioritized.appendTo(*builder, "wasDeprioritized");
numInterruptChecksPerSec.appendTo(*builder, "numInterruptChecksPerSec"); numInterruptChecksPerSec.appendTo(*builder, "numInterruptChecksPerSec");
overdueInterruptApproxMaxMillis.appendTo(*builder, "overdueInterruptApproxMaxMillis"); overdueInterruptApproxMaxMillis.appendTo(*builder, "overdueInterruptApproxMaxMillis");

View File

@ -85,6 +85,14 @@ struct QueryExecEntry {
AggregatedMetric<int64_t> totalAcquisitionDelinquencyMillis; AggregatedMetric<int64_t> totalAcquisitionDelinquencyMillis;
AggregatedMetric<int64_t> maxAcquisitionDelinquencyMillis; AggregatedMetric<int64_t> maxAcquisitionDelinquencyMillis;
/**
* Aggregates the execution control stats.
*/
AggregatedMetric<int64_t> totalTimeQueuedMicros;
AggregatedMetric<uint64_t> totalAdmissions;
AggregatedBool wasLoadShed;
AggregatedBool wasDeprioritized;
/** /**
* Aggregates the checkForInterrupt stats including getMore requests. * Aggregates the checkForInterrupt stats including getMore requests.
*/ */

View File

@ -1570,6 +1570,10 @@ void verifyQueryStatsBSON(QueryStatsEntry& qse, const QueryStatsBSONParams& para
.append("delinquentAcquisitions", emptyIntMetric) .append("delinquentAcquisitions", emptyIntMetric)
.append("totalAcquisitionDelinquencyMillis", emptyIntMetric) .append("totalAcquisitionDelinquencyMillis", emptyIntMetric)
.append("maxAcquisitionDelinquencyMillis", emptyIntMetric) .append("maxAcquisitionDelinquencyMillis", emptyIntMetric)
.append("totalTimeQueuedMicros", emptyIntMetric)
.append("totalAdmissions", emptyIntMetric)
.append("wasLoadShed", boolMetricBson(0, 0))
.append("wasDeprioritized", boolMetricBson(0, 0))
.append("numInterruptChecksPerSec", emptyIntMetric) .append("numInterruptChecksPerSec", emptyIntMetric)
.append("overdueInterruptApproxMaxMillis", emptyIntMetric); .append("overdueInterruptApproxMaxMillis", emptyIntMetric);

View File

@ -3590,6 +3590,10 @@ TEST_F(AsyncResultsMergerTest, RemoteMetricsAggregatedLocally) {
metrics.setDelinquentAcquisitions(3); metrics.setDelinquentAcquisitions(3);
metrics.setTotalAcquisitionDelinquencyMillis(100); metrics.setTotalAcquisitionDelinquencyMillis(100);
metrics.setMaxAcquisitionDelinquencyMillis(80); metrics.setMaxAcquisitionDelinquencyMillis(80);
metrics.setTotalTimeQueuedMicros(400);
metrics.setTotalAdmissions(5);
metrics.setWasLoadShed(false);
metrics.setWasDeprioritized(false);
metrics.setOverdueInterruptApproxMaxMillis(100); metrics.setOverdueInterruptApproxMaxMillis(100);
scheduleResponse(id, {fromjson("{_id: 1}")}, std::move(metrics)); scheduleResponse(id, {fromjson("{_id: 1}")}, std::move(metrics));
} }
@ -3616,6 +3620,10 @@ TEST_F(AsyncResultsMergerTest, RemoteMetricsAggregatedLocally) {
ASSERT_EQ(remoteMetrics.delinquentAcquisitions, 3); ASSERT_EQ(remoteMetrics.delinquentAcquisitions, 3);
ASSERT_EQ(remoteMetrics.totalAcquisitionDelinquency, Milliseconds(100)); ASSERT_EQ(remoteMetrics.totalAcquisitionDelinquency, Milliseconds(100));
ASSERT_EQ(remoteMetrics.maxAcquisitionDelinquency, Milliseconds(80)); ASSERT_EQ(remoteMetrics.maxAcquisitionDelinquency, Milliseconds(80));
ASSERT_EQ(remoteMetrics.totalTimeQueuedMicros, Microseconds(400));
ASSERT_EQ(remoteMetrics.totalAdmissions, 5);
ASSERT_FALSE(remoteMetrics.wasLoadShed);
ASSERT_FALSE(remoteMetrics.wasDeprioritized);
ASSERT_EQ(remoteMetrics.numInterruptChecks, 3); ASSERT_EQ(remoteMetrics.numInterruptChecks, 3);
ASSERT_EQ(remoteMetrics.overdueInterruptApproxMax, Milliseconds(100)); ASSERT_EQ(remoteMetrics.overdueInterruptApproxMax, Milliseconds(100));
ASSERT_EQ(remoteMetrics.nMatched, 1); ASSERT_EQ(remoteMetrics.nMatched, 1);
@ -3646,6 +3654,10 @@ TEST_F(AsyncResultsMergerTest, RemoteMetricsAggregatedLocally) {
metrics.setDelinquentAcquisitions(2); metrics.setDelinquentAcquisitions(2);
metrics.setTotalAcquisitionDelinquencyMillis(150); metrics.setTotalAcquisitionDelinquencyMillis(150);
metrics.setMaxAcquisitionDelinquencyMillis(120); metrics.setMaxAcquisitionDelinquencyMillis(120);
metrics.setTotalTimeQueuedMicros(200);
metrics.setTotalAdmissions(6);
metrics.setWasLoadShed(true);
metrics.setWasDeprioritized(true);
metrics.setOverdueInterruptApproxMaxMillis(200); metrics.setOverdueInterruptApproxMaxMillis(200);
scheduleResponse(CursorId(0), {fromjson("{_id: 2}")}, std::move(metrics)); scheduleResponse(CursorId(0), {fromjson("{_id: 2}")}, std::move(metrics));
} }
@ -3670,6 +3682,10 @@ TEST_F(AsyncResultsMergerTest, RemoteMetricsAggregatedLocally) {
ASSERT_EQ(remoteMetrics.delinquentAcquisitions, 5); ASSERT_EQ(remoteMetrics.delinquentAcquisitions, 5);
ASSERT_EQ(remoteMetrics.totalAcquisitionDelinquency, Milliseconds(250)); ASSERT_EQ(remoteMetrics.totalAcquisitionDelinquency, Milliseconds(250));
ASSERT_EQ(remoteMetrics.maxAcquisitionDelinquency, Milliseconds(120)); ASSERT_EQ(remoteMetrics.maxAcquisitionDelinquency, Milliseconds(120));
ASSERT_EQ(remoteMetrics.totalTimeQueuedMicros, Microseconds(600));
ASSERT_EQ(remoteMetrics.totalAdmissions, 11);
ASSERT_TRUE(remoteMetrics.wasLoadShed);
ASSERT_TRUE(remoteMetrics.wasDeprioritized);
ASSERT_EQ(remoteMetrics.numInterruptChecks, 5); ASSERT_EQ(remoteMetrics.numInterruptChecks, 5);
ASSERT_EQ(remoteMetrics.overdueInterruptApproxMax, Milliseconds(200)); ASSERT_EQ(remoteMetrics.overdueInterruptApproxMax, Milliseconds(200));
ASSERT_EQ(remoteMetrics.nMatched, 3); ASSERT_EQ(remoteMetrics.nMatched, 3);
@ -3693,6 +3709,10 @@ TEST_F(AsyncResultsMergerTest, RemoteMetricsAggregatedLocally) {
ASSERT_EQ(remoteMetrics.delinquentAcquisitions, 0); ASSERT_EQ(remoteMetrics.delinquentAcquisitions, 0);
ASSERT_EQ(remoteMetrics.totalAcquisitionDelinquency, Milliseconds(0)); ASSERT_EQ(remoteMetrics.totalAcquisitionDelinquency, Milliseconds(0));
ASSERT_EQ(remoteMetrics.maxAcquisitionDelinquency, Milliseconds(0)); ASSERT_EQ(remoteMetrics.maxAcquisitionDelinquency, Milliseconds(0));
ASSERT_EQ(remoteMetrics.totalTimeQueuedMicros, Microseconds(0));
ASSERT_EQ(remoteMetrics.totalAdmissions, 0);
ASSERT_FALSE(remoteMetrics.wasLoadShed);
ASSERT_FALSE(remoteMetrics.wasDeprioritized);
ASSERT_EQ(remoteMetrics.numInterruptChecks, 0); ASSERT_EQ(remoteMetrics.numInterruptChecks, 0);
ASSERT_EQ(remoteMetrics.overdueInterruptApproxMax, Milliseconds(0)); ASSERT_EQ(remoteMetrics.overdueInterruptApproxMax, Milliseconds(0));
ASSERT_EQ(remoteMetrics.nMatched, 0); ASSERT_EQ(remoteMetrics.nMatched, 0);