diff --git a/jstests/noPassthrough/admission/execution_control/deprioritization_stats_and_observability.js b/jstests/noPassthrough/admission/execution_control/deprioritization_stats_and_observability.js index 18aabebb932..103fe767bee 100644 --- a/jstests/noPassthrough/admission/execution_control/deprioritization_stats_and_observability.js +++ b/jstests/noPassthrough/admission/execution_control/deprioritization_stats_and_observability.js @@ -13,9 +13,10 @@ import {configureFailPoint} from "jstests/libs/fail_point_util.js"; import {findMatchingLogLine} from "jstests/libs/log.js"; import {after, before, describe, it} from "jstests/libs/mochalite.js"; -import {funWithArgs} from "jstests/libs/parallel_shell_helpers.js"; -import {ReplSetTest} from "jstests/libs/replsettest.js"; import {Thread} from "jstests/libs/parallelTester.js"; +import {funWithArgs} from "jstests/libs/parallel_shell_helpers.js"; +import {getQueryExecMetrics, getQueryStats} from "jstests/libs/query/query_stats_utils.js"; +import {ReplSetTest} from "jstests/libs/replsettest.js"; import { getExecutionControlStats, getTotalDeprioritizationCount, @@ -404,6 +405,58 @@ describe("Execution control statistics and observability", function () { ); }); }); + describe("deprioritization reporting in query execution metrics", function () { + let replTest, mongod, db, coll; + const findComment = "deprioritized_find_test"; + + before(function () { + replTest = new ReplSetTest({ + nodes: 1, + nodeOptions: { + setParameter: { + // Force the query to yield frequently to better expose the low-priority + // behavior. + internalQueryExecYieldIterations: 1, + executionControlDeprioritizationGate: true, + executionControlHeuristicNumAdmissionsDeprioritizeThreshold: 1, + logComponentVerbosity: {command: 2}, + internalQueryStatsRateLimit: -1, + }, + slowms: 0, + }, + }); + replTest.startSet(); + replTest.initiate(); + mongod = replTest.getPrimary(); + db = mongod.getDB(jsTestName()); + coll = db.coll; + + insertTestDocuments(coll, 100); + }); + + after(function () { + replTest.stopSet(); + }); + + // TODO SERVER-112150: Once all versions support QueryStatsMetricsSubsections feature flag, simply check metrics["queryExec"]. + function verifyQueryStatsMetrics(metrics) { + let statsObj = metrics.hasOwnProperty("queryExec") ? metrics["queryExec"] : metrics; + assert.gte(statsObj.totalTimeQueuedMicros.sum, 0); + assert.gte(statsObj.totalAdmissions.sum, 0); + assert.gte(statsObj.wasLoadShed.false, 0); + assert.gte(statsObj.wasDeprioritized.false, 0); + } + + it("should report deprioritization in query execution metrics", function () { + runDeprioritizedFind(coll, findComment + "_totalization"); + const queryStats = getQueryStats(mongod, {collName: coll.getName()}); + assert( + queryStats.length === 1, + "Expected to find exactly one query stats entry for '" + coll.getName() + "' " + tojson(queryStats), + ); + verifyQueryStatsMetrics(queryStats[0].metrics); + }); + }); describe("Delinquent operation statistics", function () { let replTest, mongod, db, coll; diff --git a/jstests/noPassthroughWithMongod/query/include_query_stats_metrics.js b/jstests/noPassthroughWithMongod/query/include_query_stats_metrics.js index 86348a9cd4d..5faca2ed372 100644 --- a/jstests/noPassthroughWithMongod/query/include_query_stats_metrics.js +++ b/jstests/noPassthroughWithMongod/query/include_query_stats_metrics.js @@ -50,6 +50,11 @@ function assertMetricsEqual( assertMetricEqual(metrics, "totalAcquisitionDelinquencyMillis", 0); assertMetricEqual(metrics, "maxAcquisitionDelinquencyMillis", 0); assertMetricEqual(metrics, "overdueInterruptApproxMaxMillis", 0); + // The exact value of the following metrics cannot be determined beforehand, but we can assert their existence. + assertMetricEqual(metrics, "totalTimeQueuedMicros", undefined); + assertMetricEqual(metrics, "totalAdmissions", undefined); + assertMetricEqual(metrics, "wasLoadShed", undefined); + assertMetricEqual(metrics, "wasDeprioritized", undefined); } function assertWriteMetricsEqual( diff --git a/src/mongo/db/curop.cpp b/src/mongo/db/curop.cpp index ef1063d8b8b..1e93f4df90e 100644 --- a/src/mongo/db/curop.cpp +++ b/src/mongo/db/curop.cpp @@ -488,21 +488,36 @@ void CurOp::setEndOfOpMetrics(long long nreturned) { metrics.cpuNanos = metrics.cpuNanos.value_or(Nanoseconds(0)) + _debug.cpuTime; if (const auto& admCtx = ExecutionAdmissionContext::get(opCtx()); - admCtx.getDelinquentAcquisitions() > 0 && !opCtx()->inMultiDocumentTransaction() && - !parent()) { - // Note that we don't record delinquency stats around ticketing when in a - // multi-document transaction, since operations within multi-document transactions hold - // tickets for a long time by design and reporting them as delinquent will just create - // noise in the data. + !opCtx()->inMultiDocumentTransaction() && !parent()) { + if (admCtx.getDelinquentAcquisitions() > 0) { - metrics.delinquentAcquisitions = metrics.delinquentAcquisitions.value_or(0) + - static_cast(admCtx.getDelinquentAcquisitions()); - metrics.totalAcquisitionDelinquency = - metrics.totalAcquisitionDelinquency.value_or(Milliseconds(0)) + - Milliseconds(admCtx.getTotalAcquisitionDelinquencyMillis()); - metrics.maxAcquisitionDelinquency = Milliseconds{ - std::max(metrics.maxAcquisitionDelinquency.value_or(Milliseconds(0)).count(), - admCtx.getMaxAcquisitionDelinquencyMillis())}; + // Note that we don't record delinquency stats around ticketing when in a + // multi-document transaction, since operations within multi-document + // transactions hold tickets for a long time by design and reporting them as + // delinquent will just create noise in the data. + + metrics.delinquentAcquisitions = metrics.delinquentAcquisitions.value_or(0) + + static_cast(admCtx.getDelinquentAcquisitions()); + metrics.totalAcquisitionDelinquency = + metrics.totalAcquisitionDelinquency.value_or(Milliseconds(0)) + + Milliseconds(admCtx.getTotalAcquisitionDelinquencyMillis()); + metrics.maxAcquisitionDelinquency = Milliseconds{ + std::max(metrics.maxAcquisitionDelinquency.value_or(Milliseconds(0)).count(), + admCtx.getMaxAcquisitionDelinquencyMillis())}; + } + // Only record execution control metrics for non exempted operations, that is, user + // operations, this is done to prevent having extra noise on the statistics comming from + // internal operations. + if (admCtx.getAdmissions() > 0 && admCtx.getExemptedAdmissions() == 0) { + metrics.totalTimeQueuedMicros = + metrics.totalTimeQueuedMicros.value_or(Microseconds(0)) + + admCtx.totalTimeQueuedMicros(); + metrics.totalAdmissions = metrics.totalAdmissions.value_or(0) + + (admCtx.getAdmissions() - admCtx.getExemptedAdmissions()); + metrics.wasLoadShed = metrics.wasLoadShed.value_or(false) || admCtx.getLoadShed(); + metrics.wasDeprioritized = + metrics.wasDeprioritized.value_or(false) || admCtx.getLoadShed(); + } } if (!parent()) { @@ -1166,6 +1181,12 @@ void CurOp::reportState(BSONObjBuilder* builder, BSONObjBuilder sub(builder->subobjStart("delinquencyInfo")); OpDebug::appendDelinquentInfo(opCtx, sub); } + if (admCtx.getAdmissions() > 0) { + builder->append("totalTimeQueuedMicros", admCtx.totalTimeQueuedMicros().count()); + builder->append("totalAdmissions", admCtx.getAdmissions()); + builder->append("wasLoadShed", admCtx.getLoadShed()); + builder->append("wasDeprioritized", admCtx.getPriorityLowered()); + } } populateCurrentOpQueueStats(opCtx, _tickSource, builder); diff --git a/src/mongo/db/curop_test.cpp b/src/mongo/db/curop_test.cpp index c4af8885114..6e0a455c243 100644 --- a/src/mongo/db/curop_test.cpp +++ b/src/mongo/db/curop_test.cpp @@ -105,6 +105,14 @@ TEST(CurOpTest, AddingAdditiveMetricsObjectsTogetherShouldAddFieldsTogether) { additiveMetricsToAdd.totalAcquisitionDelinquency = Milliseconds{200}; currentAdditiveMetrics.maxAcquisitionDelinquency = Milliseconds{300}; additiveMetricsToAdd.maxAcquisitionDelinquency = Milliseconds{100}; + currentAdditiveMetrics.totalTimeQueuedMicros = Microseconds{300}; + additiveMetricsToAdd.totalTimeQueuedMicros = Microseconds{200}; + currentAdditiveMetrics.totalAdmissions = 1; + additiveMetricsToAdd.totalAdmissions = 2; + currentAdditiveMetrics.wasLoadShed = false; + additiveMetricsToAdd.wasLoadShed = true; + currentAdditiveMetrics.wasDeprioritized = false; + additiveMetricsToAdd.wasDeprioritized = true; currentAdditiveMetrics.numInterruptChecks = 1; additiveMetricsToAdd.numInterruptChecks = 2; currentAdditiveMetrics.overdueInterruptApproxMax = Milliseconds{100}; @@ -154,6 +162,15 @@ TEST(CurOpTest, AddingAdditiveMetricsObjectsTogetherShouldAddFieldsTogether) { ASSERT_EQ(*currentAdditiveMetrics.maxAcquisitionDelinquency, std::max(*additiveMetricsBeforeAdd.maxAcquisitionDelinquency, *additiveMetricsToAdd.maxAcquisitionDelinquency)); + ASSERT_EQ(*currentAdditiveMetrics.totalTimeQueuedMicros, + *additiveMetricsBeforeAdd.totalTimeQueuedMicros + + *additiveMetricsToAdd.totalTimeQueuedMicros); + ASSERT_EQ(*currentAdditiveMetrics.totalAdmissions, + *additiveMetricsBeforeAdd.totalAdmissions + *additiveMetricsToAdd.totalAdmissions); + ASSERT_EQ(*currentAdditiveMetrics.wasLoadShed, + *additiveMetricsBeforeAdd.wasLoadShed || *additiveMetricsToAdd.wasLoadShed); + ASSERT_EQ(*currentAdditiveMetrics.wasDeprioritized, + *additiveMetricsBeforeAdd.wasDeprioritized || *additiveMetricsToAdd.wasDeprioritized); ASSERT_EQ(*currentAdditiveMetrics.numInterruptChecks, *additiveMetricsBeforeAdd.numInterruptChecks + *additiveMetricsToAdd.numInterruptChecks); @@ -181,6 +198,11 @@ TEST(CurOpTest, AddingUninitializedAdditiveMetricsFieldsShouldBeTreatedAsZero) { additiveMetricsToAdd.delinquentAcquisitions = 1; additiveMetricsToAdd.totalAcquisitionDelinquency = Milliseconds(100); additiveMetricsToAdd.maxAcquisitionDelinquency = Milliseconds(100); + additiveMetricsToAdd.totalTimeQueuedMicros = Microseconds(100); + additiveMetricsToAdd.totalAdmissions = 1; + additiveMetricsToAdd.wasLoadShed = true; + additiveMetricsToAdd.wasDeprioritized = true; + additiveMetricsToAdd.maxAcquisitionDelinquency = Milliseconds(100); additiveMetricsToAdd.numInterruptChecks = 1; additiveMetricsToAdd.overdueInterruptApproxMax = Milliseconds(100); @@ -239,6 +261,14 @@ TEST(CurOpTest, AddingUninitializedAdditiveMetricsFieldsShouldBeTreatedAsZero) { ASSERT_EQ(*currentAdditiveMetrics.numInterruptChecks, *additiveMetricsToAdd.numInterruptChecks); ASSERT_EQ(*currentAdditiveMetrics.overdueInterruptApproxMax, *additiveMetricsToAdd.overdueInterruptApproxMax); + + // The execution control fields for the current AdditiveMetrics object were not initialized, so + // they should be treated as zero. + ASSERT_EQ(*currentAdditiveMetrics.totalTimeQueuedMicros, + *additiveMetricsToAdd.totalTimeQueuedMicros); + ASSERT_EQ(*currentAdditiveMetrics.totalAdmissions, *additiveMetricsToAdd.totalAdmissions); + ASSERT_EQ(*currentAdditiveMetrics.wasLoadShed, *additiveMetricsToAdd.wasLoadShed); + ASSERT_EQ(*currentAdditiveMetrics.wasDeprioritized, *additiveMetricsToAdd.wasDeprioritized); } TEST(CurOpTest, AdditiveMetricsFieldsShouldIncrementByN) { @@ -278,6 +308,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateCursorMetrics) { additiveMetrics.delinquentAcquisitions = 2; additiveMetrics.totalAcquisitionDelinquency = Milliseconds(400); additiveMetrics.maxAcquisitionDelinquency = Milliseconds(300); + additiveMetrics.totalTimeQueuedMicros = Microseconds(400); + additiveMetrics.totalAdmissions = 2; + additiveMetrics.wasLoadShed = false; + additiveMetrics.wasDeprioritized = false; additiveMetrics.numInterruptChecks = 2; additiveMetrics.overdueInterruptApproxMax = Milliseconds(100); additiveMetrics.nMatched = 1; @@ -286,7 +320,6 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateCursorMetrics) { additiveMetrics.ndeleted = 0; additiveMetrics.ninserted = 0; - CursorMetrics cursorMetrics(3 /* keysExamined */, 4 /* docsExamined */, 10 /* bytesRead */, @@ -307,6 +340,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateCursorMetrics) { cursorMetrics.setTotalAcquisitionDelinquencyMillis(400); cursorMetrics.setMaxAcquisitionDelinquencyMillis(200); cursorMetrics.setOverdueInterruptApproxMaxMillis(200); + cursorMetrics.setTotalTimeQueuedMicros(400); + cursorMetrics.setTotalAdmissions(5); + cursorMetrics.setWasLoadShed(true); + cursorMetrics.setWasDeprioritized(true); additiveMetrics.aggregateCursorMetrics(cursorMetrics); @@ -328,6 +365,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateCursorMetrics) { ASSERT_EQ(*additiveMetrics.nModified, 2); ASSERT_EQ(*additiveMetrics.ndeleted, 0); ASSERT_EQ(*additiveMetrics.ninserted, 0); + ASSERT_EQ(*additiveMetrics.totalTimeQueuedMicros, Microseconds(800)); + ASSERT_EQ(*additiveMetrics.totalAdmissions, 7); + ASSERT_EQ(*additiveMetrics.wasLoadShed, true); + ASSERT_EQ(*additiveMetrics.wasDeprioritized, true); } TEST(CurOpTest, AdditiveMetricsShouldAggregateNegativeCpuNanos) { @@ -400,6 +441,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateDataBearingNodeMetrics) { additiveMetrics.delinquentAcquisitions = 2; additiveMetrics.totalAcquisitionDelinquency = Milliseconds(400); additiveMetrics.maxAcquisitionDelinquency = Milliseconds(200); + additiveMetrics.totalTimeQueuedMicros = Microseconds(400); + additiveMetrics.totalAdmissions = 3; + additiveMetrics.wasLoadShed = true; + additiveMetrics.wasDeprioritized = true; additiveMetrics.numInterruptChecks = 2; additiveMetrics.overdueInterruptApproxMax = Milliseconds(100); @@ -413,6 +458,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateDataBearingNodeMetrics) { remoteMetrics.delinquentAcquisitions = 1; remoteMetrics.totalAcquisitionDelinquency = Milliseconds(300); remoteMetrics.maxAcquisitionDelinquency = Milliseconds(300); + remoteMetrics.totalTimeQueuedMicros = Microseconds(300); + remoteMetrics.totalAdmissions = 2; + remoteMetrics.wasLoadShed = false; + remoteMetrics.wasDeprioritized = false; remoteMetrics.numInterruptChecks = 1; remoteMetrics.overdueInterruptApproxMax = Milliseconds(300); @@ -427,6 +476,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateDataBearingNodeMetrics) { ASSERT_EQ(*additiveMetrics.delinquentAcquisitions, 3); ASSERT_EQ(*additiveMetrics.totalAcquisitionDelinquency, Milliseconds(700)); ASSERT_EQ(*additiveMetrics.maxAcquisitionDelinquency, Milliseconds(300)); + ASSERT_EQ(*additiveMetrics.totalTimeQueuedMicros, Microseconds(700)); + ASSERT_EQ(*additiveMetrics.totalAdmissions, 5); + ASSERT_EQ(*additiveMetrics.wasLoadShed, true); + ASSERT_EQ(*additiveMetrics.wasDeprioritized, true); ASSERT_EQ(*additiveMetrics.numInterruptChecks, 3); ASSERT_EQ(*additiveMetrics.overdueInterruptApproxMax, Milliseconds(300)); } @@ -696,6 +749,41 @@ TEST(CurOpTest, ReportStateIncludesDelinquentStatsIfNonZero) { } } +TEST(CurOpTest, ReportDeprioritizationStats) { + QueryTestServiceContext serviceContext; + auto opCtx = serviceContext.makeOperationContext(); + auto curOp = CurOp::get(*opCtx); + curOp->ensureStarted(); + + // 1. Check the default state. If they're zero, they're not reported. + { + BSONObjBuilder bob; + curOp->reportState(&bob, SerializationContext{}); + BSONObj state = bob.obj(); + ASSERT_FALSE(state.hasField("totalTimeQueuedMicros")); + ASSERT_FALSE(state.hasField("totalAdmissions")); + ASSERT_FALSE(state.hasField("wasLoadShed")); + ASSERT_FALSE(state.hasField("wasDeprioritized")); + } + + // 2. If there are admissions then report the stats. + { + ExecutionAdmissionContext::get(opCtx.get()).setAdmission_forTest(1); + ExecutionAdmissionContext::get(opCtx.get()).setTotalTimeQueuedMicros_forTest(100); + BSONObjBuilder bob; + curOp->reportState(&bob, SerializationContext{}); + BSONObj state = bob.obj(); + ASSERT_TRUE(state.hasField("totalAdmissions")); + ASSERT_EQ(state["totalAdmissions"].Number(), 1); + ASSERT_TRUE(state.hasField("totalTimeQueuedMicros")); + ASSERT_EQ(state["totalTimeQueuedMicros"].Number(), 100); + ASSERT_TRUE(state.hasField("wasLoadShed")); + ASSERT_FALSE(state["wasLoadShed"].Bool()); + ASSERT_TRUE(state.hasField("wasDeprioritized")); + ASSERT_FALSE(state["wasDeprioritized"].Bool()); + } +} + TEST(CurOpTest, ReportStateIncludesPriorityLowered) { QueryTestServiceContext serviceContext; auto opCtx = serviceContext.makeOperationContext(); diff --git a/src/mongo/db/op_debug.cpp b/src/mongo/db/op_debug.cpp index f33f5e8fd27..0a34692abdc 100644 --- a/src/mongo/db/op_debug.cpp +++ b/src/mongo/db/op_debug.cpp @@ -1335,6 +1335,12 @@ CursorMetrics OpDebug::getCursorMetrics() const { metrics.setMaxAcquisitionDelinquencyMillis( additiveMetrics.maxAcquisitionDelinquency.value_or(Milliseconds(0)).count()); + metrics.setTotalTimeQueuedMicros( + additiveMetrics.totalTimeQueuedMicros.value_or(Microseconds(0)).count()); + metrics.setTotalAdmissions(additiveMetrics.totalAdmissions.value_or(0)); + metrics.setWasLoadShed(additiveMetrics.wasLoadShed.value_or(false)); + metrics.setWasDeprioritized(additiveMetrics.wasDeprioritized.value_or(false)); + metrics.setNumInterruptChecks(additiveMetrics.numInterruptChecks.value_or(0)); metrics.setOverdueInterruptApproxMaxMillis( additiveMetrics.overdueInterruptApproxMax.value_or(Milliseconds(0)).count()); @@ -1481,6 +1487,10 @@ void OpDebug::AdditiveMetrics::add(const AdditiveMetrics& otherMetrics) { maxAcquisitionDelinquency = std::max(maxAcquisitionDelinquency.value_or(Milliseconds(0)), *otherMetrics.maxAcquisitionDelinquency); } + totalTimeQueuedMicros = addOptionals(totalTimeQueuedMicros, otherMetrics.totalTimeQueuedMicros); + totalAdmissions = addOptionals(totalAdmissions, otherMetrics.totalAdmissions); + wasLoadShed = addOptionals(wasLoadShed, otherMetrics.wasLoadShed); + wasDeprioritized = addOptionals(wasDeprioritized, otherMetrics.wasDeprioritized); hasSortStage = hasSortStage || otherMetrics.hasSortStage; usedDisk = usedDisk || otherMetrics.usedDisk; @@ -1514,6 +1524,12 @@ void OpDebug::AdditiveMetrics::aggregateDataBearingNodeMetrics( maxAcquisitionDelinquency = std::max(maxAcquisitionDelinquency.value_or(Milliseconds(0)), metrics.maxAcquisitionDelinquency); + totalTimeQueuedMicros = + totalTimeQueuedMicros.value_or(Microseconds(0)) + metrics.totalTimeQueuedMicros; + totalAdmissions = totalAdmissions.value_or(0) + metrics.totalAdmissions; + wasLoadShed = wasLoadShed.value_or(false) || metrics.wasLoadShed; + wasDeprioritized = wasDeprioritized.value_or(false) || metrics.wasDeprioritized; + numInterruptChecks = numInterruptChecks.value_or(0) + metrics.numInterruptChecks; overdueInterruptApproxMax = std::max(overdueInterruptApproxMax.value_or(Milliseconds(0)), metrics.overdueInterruptApproxMax); @@ -1558,7 +1574,11 @@ void OpDebug::AdditiveMetrics::aggregateCursorMetrics(const CursorMetrics& metri static_cast(metrics.getNUpserted()), static_cast(metrics.getNModified()), static_cast(metrics.getNDeleted()), - static_cast(metrics.getNInserted())}); + static_cast(metrics.getNInserted()), + Microseconds(metrics.getTotalTimeQueuedMicros()), + static_cast(metrics.getTotalAdmissions()), + metrics.getWasLoadShed(), + metrics.getWasDeprioritized()}); } void OpDebug::AdditiveMetrics::aggregateStorageStats(const StorageStats& stats) { diff --git a/src/mongo/db/op_debug.h b/src/mongo/db/op_debug.h index 6f14eff1ad1..5eb87fa8f77 100644 --- a/src/mongo/db/op_debug.h +++ b/src/mongo/db/op_debug.h @@ -230,6 +230,13 @@ public: boost::optional numInterruptChecks; boost::optional overdueInterruptApproxMax; + + // If query stats are being collected for this operation, stores the execution control + // statistics. + boost::optional totalTimeQueuedMicros; + boost::optional totalAdmissions; + boost::optional wasLoadShed; + boost::optional wasDeprioritized; }; MONGO_MOD_PRIVATE OpDebug() = default; diff --git a/src/mongo/db/query/client_cursor/cursor_response.idl b/src/mongo/db/query/client_cursor/cursor_response.idl index 2e6e2bf8a2f..b66d502c6f6 100644 --- a/src/mongo/db/query/client_cursor/cursor_response.idl +++ b/src/mongo/db/query/client_cursor/cursor_response.idl @@ -135,6 +135,26 @@ structs: description: "The number of documents inserted (excluding upserts)." type: long stability: unstable + totalTimeQueuedMicros: + description: "Total time spent waiting for a ticket." + type: long + stability: unstable + default: 0 + totalAdmissions: + description: "Total number of admissions during the operation." + type: long + stability: unstable + default: 0 + wasLoadShed: + description: "If true, the operation was load shed." + type: bool + stability: unstable + default: false + wasDeprioritized: + description: "If true, the operation was deprioritized at some point." + type: bool + stability: unstable + default: false ResponseCursorBase: description: "Common fields of initial and subsequent cursor responses." diff --git a/src/mongo/db/query/client_cursor/cursor_response_test.cpp b/src/mongo/db/query/client_cursor/cursor_response_test.cpp index ce47b116cc6..0f25aa537d7 100644 --- a/src/mongo/db/query/client_cursor/cursor_response_test.cpp +++ b/src/mongo/db/query/client_cursor/cursor_response_test.cpp @@ -63,6 +63,10 @@ static const BSONObj basicMetricsObj = fromjson(R"({ delinquentAcquisitions: {"$numberLong": "0"}, totalAcquisitionDelinquencyMillis: {"$numberLong": "0"}, maxAcquisitionDelinquencyMillis: {"$numberLong": "0"}, + totalTimeQueuedMicros: {"$numberLong": "0"}, + totalAdmissions: {"$numberLong": "0"}, + wasLoadShed: false, + wasDeprioritized: false, numInterruptChecks: {"$numberLong": "0"}, overdueInterruptApproxMaxMillis: {"$numberLong": "0"}, nMatched: {"$numberLong": "0"}, @@ -323,6 +327,10 @@ TEST(CursorResponseTest, parseFromBSONCursorMetrics) { ASSERT_EQ(metrics.getDelinquentAcquisitions(), 0); ASSERT_EQ(metrics.getTotalAcquisitionDelinquencyMillis(), 0); ASSERT_EQ(metrics.getMaxAcquisitionDelinquencyMillis(), 0); + ASSERT_EQ(metrics.getTotalTimeQueuedMicros(), 0); + ASSERT_EQ(metrics.getTotalAdmissions(), 0); + ASSERT_FALSE(metrics.getWasLoadShed()); + ASSERT_FALSE(metrics.getWasDeprioritized()); ASSERT_EQ(metrics.getNumInterruptChecks(), 0); ASSERT_EQ(metrics.getOverdueInterruptApproxMaxMillis(), 0); ASSERT_EQ(metrics.getNMatched(), 0); @@ -991,6 +999,10 @@ TEST_F(CursorResponseBuilderTest, buildResponseWithAllKnownFields) { ASSERT_EQ(parsedMetrics->getDelinquentAcquisitions(), 0); ASSERT_EQ(parsedMetrics->getTotalAcquisitionDelinquencyMillis(), 0); ASSERT_EQ(parsedMetrics->getMaxAcquisitionDelinquencyMillis(), 0); + ASSERT_EQ(parsedMetrics->getTotalTimeQueuedMicros(), 0); + ASSERT_EQ(parsedMetrics->getTotalAdmissions(), 0); + ASSERT_FALSE(parsedMetrics->getWasLoadShed()); + ASSERT_FALSE(parsedMetrics->getWasDeprioritized()); ASSERT_EQ(parsedMetrics->getNumInterruptChecks(), 15); ASSERT_EQ(parsedMetrics->getOverdueInterruptApproxMaxMillis(), 0); ASSERT_EQ(parsedMetrics->getNMatched(), 1); diff --git a/src/mongo/db/query/query_stats/data_bearing_node_metrics.h b/src/mongo/db/query/query_stats/data_bearing_node_metrics.h index 89f455ff57a..4f71d6c36ff 100644 --- a/src/mongo/db/query/query_stats/data_bearing_node_metrics.h +++ b/src/mongo/db/query/query_stats/data_bearing_node_metrics.h @@ -69,6 +69,11 @@ struct MONGO_MOD_PUB DataBearingNodeMetrics { uint64_t nDeleted = 0; uint64_t nInserted = 0; + Microseconds totalTimeQueuedMicros{0}; + uint64_t totalAdmissions = 0; + bool wasLoadShed = false; + bool wasDeprioritized = false; + /** * Adds the fields from the given object into the fields of this object using addition (in the * case of numeric metrics) or conjunction/disjunction (in the case of boolean metrics). @@ -96,6 +101,10 @@ struct MONGO_MOD_PUB DataBearingNodeMetrics { nModified += other.nModified; nDeleted += other.nDeleted; nInserted += other.nInserted; + totalTimeQueuedMicros += other.totalTimeQueuedMicros; + totalAdmissions += other.totalAdmissions; + wasLoadShed = wasLoadShed || other.wasLoadShed; + wasDeprioritized = wasDeprioritized || other.wasDeprioritized; } void add(const boost::optional& other) { @@ -131,6 +140,10 @@ struct MONGO_MOD_PUB DataBearingNodeMetrics { nModified += metrics.getNModified(); nDeleted += metrics.getNDeleted(); nInserted += metrics.getNInserted(); + totalTimeQueuedMicros += Microseconds(metrics.getTotalTimeQueuedMicros()); + totalAdmissions += metrics.getTotalAdmissions(); + wasLoadShed = wasLoadShed || metrics.getWasLoadShed(); + wasDeprioritized = wasDeprioritized || metrics.getWasDeprioritized(); } }; diff --git a/src/mongo/db/query/query_stats/query_stats.cpp b/src/mongo/db/query/query_stats/query_stats.cpp index 710d63ecc33..b049dce0064 100644 --- a/src/mongo/db/query/query_stats/query_stats.cpp +++ b/src/mongo/db/query/query_stats/query_stats.cpp @@ -269,6 +269,11 @@ void updateQueryExecStatistics(QueryExecEntry& queryExecEntryToUpdate, queryExecEntryToUpdate.maxAcquisitionDelinquencyMillis.aggregate( snapshot.maxAcquisitionDelinquencyMillis); + queryExecEntryToUpdate.totalTimeQueuedMicros.aggregate(snapshot.totalTimeQueuedMicros); + queryExecEntryToUpdate.totalAdmissions.aggregate(snapshot.totalAdmissions); + queryExecEntryToUpdate.wasLoadShed.aggregate(snapshot.wasLoadShed); + queryExecEntryToUpdate.wasDeprioritized.aggregate(snapshot.wasDeprioritized); + // Store the number of interrupt checks per second as a rate, this can give us a better sense of // how often interrupts are being checked relative to the total execution time across multiple // query runs. @@ -503,7 +508,10 @@ QueryStatsSnapshot captureMetrics(const OperationContext* opCtx, static_cast(metrics.nModified.value_or(0)), static_cast(metrics.ndeleted.value_or(0)), static_cast(metrics.ninserted.value_or(0)), - }; + metrics.totalTimeQueuedMicros.value_or(Microseconds(0)).count(), + static_cast(metrics.totalAdmissions.value_or(0)), + metrics.wasLoadShed.value_or(false), + metrics.wasDeprioritized.value_or(false)}; return snapshot; } diff --git a/src/mongo/db/query/query_stats/query_stats.h b/src/mongo/db/query/query_stats/query_stats.h index a4c929bb5b9..2920a49dcb9 100644 --- a/src/mongo/db/query/query_stats/query_stats.h +++ b/src/mongo/db/query/query_stats/query_stats.h @@ -253,6 +253,11 @@ struct QueryStatsSnapshot { uint64_t nModified; uint64_t nDeleted; uint64_t nInserted; + + int64_t totalTimeQueuedMicros; + uint64_t totalAdmissions; + bool wasLoadShed; + bool wasDeprioritized; }; /** diff --git a/src/mongo/db/query/query_stats/query_stats_entry.cpp b/src/mongo/db/query/query_stats/query_stats_entry.cpp index 089b58cfffe..b7adf7243b7 100644 --- a/src/mongo/db/query/query_stats/query_stats_entry.cpp +++ b/src/mongo/db/query/query_stats/query_stats_entry.cpp @@ -93,6 +93,11 @@ void QueryExecEntry::toBSON(BSONObjBuilder& queryStatsBuilder, bool buildAsSubse totalAcquisitionDelinquencyMillis.appendTo(*builder, "totalAcquisitionDelinquencyMillis"); maxAcquisitionDelinquencyMillis.appendTo(*builder, "maxAcquisitionDelinquencyMillis"); + totalTimeQueuedMicros.appendTo(*builder, "totalTimeQueuedMicros"); + totalAdmissions.appendTo(*builder, "totalAdmissions"); + wasLoadShed.appendTo(*builder, "wasLoadShed"); + wasDeprioritized.appendTo(*builder, "wasDeprioritized"); + numInterruptChecksPerSec.appendTo(*builder, "numInterruptChecksPerSec"); overdueInterruptApproxMaxMillis.appendTo(*builder, "overdueInterruptApproxMaxMillis"); diff --git a/src/mongo/db/query/query_stats/query_stats_entry.h b/src/mongo/db/query/query_stats/query_stats_entry.h index 8acb41f7b34..619654a158b 100644 --- a/src/mongo/db/query/query_stats/query_stats_entry.h +++ b/src/mongo/db/query/query_stats/query_stats_entry.h @@ -85,6 +85,14 @@ struct QueryExecEntry { AggregatedMetric totalAcquisitionDelinquencyMillis; AggregatedMetric maxAcquisitionDelinquencyMillis; + /** + * Aggregates the execution control stats. + */ + AggregatedMetric totalTimeQueuedMicros; + AggregatedMetric totalAdmissions; + AggregatedBool wasLoadShed; + AggregatedBool wasDeprioritized; + /** * Aggregates the checkForInterrupt stats including getMore requests. */ diff --git a/src/mongo/db/query/query_stats/query_stats_store_test.cpp b/src/mongo/db/query/query_stats/query_stats_store_test.cpp index 3eeaf22c4ce..8acb24a9e9c 100644 --- a/src/mongo/db/query/query_stats/query_stats_store_test.cpp +++ b/src/mongo/db/query/query_stats/query_stats_store_test.cpp @@ -1570,6 +1570,10 @@ void verifyQueryStatsBSON(QueryStatsEntry& qse, const QueryStatsBSONParams& para .append("delinquentAcquisitions", emptyIntMetric) .append("totalAcquisitionDelinquencyMillis", emptyIntMetric) .append("maxAcquisitionDelinquencyMillis", emptyIntMetric) + .append("totalTimeQueuedMicros", emptyIntMetric) + .append("totalAdmissions", emptyIntMetric) + .append("wasLoadShed", boolMetricBson(0, 0)) + .append("wasDeprioritized", boolMetricBson(0, 0)) .append("numInterruptChecksPerSec", emptyIntMetric) .append("overdueInterruptApproxMaxMillis", emptyIntMetric); diff --git a/src/mongo/s/query/exec/async_results_merger_test.cpp b/src/mongo/s/query/exec/async_results_merger_test.cpp index c8331b79c20..50169e0b728 100644 --- a/src/mongo/s/query/exec/async_results_merger_test.cpp +++ b/src/mongo/s/query/exec/async_results_merger_test.cpp @@ -3590,6 +3590,10 @@ TEST_F(AsyncResultsMergerTest, RemoteMetricsAggregatedLocally) { metrics.setDelinquentAcquisitions(3); metrics.setTotalAcquisitionDelinquencyMillis(100); metrics.setMaxAcquisitionDelinquencyMillis(80); + metrics.setTotalTimeQueuedMicros(400); + metrics.setTotalAdmissions(5); + metrics.setWasLoadShed(false); + metrics.setWasDeprioritized(false); metrics.setOverdueInterruptApproxMaxMillis(100); scheduleResponse(id, {fromjson("{_id: 1}")}, std::move(metrics)); } @@ -3616,6 +3620,10 @@ TEST_F(AsyncResultsMergerTest, RemoteMetricsAggregatedLocally) { ASSERT_EQ(remoteMetrics.delinquentAcquisitions, 3); ASSERT_EQ(remoteMetrics.totalAcquisitionDelinquency, Milliseconds(100)); ASSERT_EQ(remoteMetrics.maxAcquisitionDelinquency, Milliseconds(80)); + ASSERT_EQ(remoteMetrics.totalTimeQueuedMicros, Microseconds(400)); + ASSERT_EQ(remoteMetrics.totalAdmissions, 5); + ASSERT_FALSE(remoteMetrics.wasLoadShed); + ASSERT_FALSE(remoteMetrics.wasDeprioritized); ASSERT_EQ(remoteMetrics.numInterruptChecks, 3); ASSERT_EQ(remoteMetrics.overdueInterruptApproxMax, Milliseconds(100)); ASSERT_EQ(remoteMetrics.nMatched, 1); @@ -3646,6 +3654,10 @@ TEST_F(AsyncResultsMergerTest, RemoteMetricsAggregatedLocally) { metrics.setDelinquentAcquisitions(2); metrics.setTotalAcquisitionDelinquencyMillis(150); metrics.setMaxAcquisitionDelinquencyMillis(120); + metrics.setTotalTimeQueuedMicros(200); + metrics.setTotalAdmissions(6); + metrics.setWasLoadShed(true); + metrics.setWasDeprioritized(true); metrics.setOverdueInterruptApproxMaxMillis(200); scheduleResponse(CursorId(0), {fromjson("{_id: 2}")}, std::move(metrics)); } @@ -3670,6 +3682,10 @@ TEST_F(AsyncResultsMergerTest, RemoteMetricsAggregatedLocally) { ASSERT_EQ(remoteMetrics.delinquentAcquisitions, 5); ASSERT_EQ(remoteMetrics.totalAcquisitionDelinquency, Milliseconds(250)); ASSERT_EQ(remoteMetrics.maxAcquisitionDelinquency, Milliseconds(120)); + ASSERT_EQ(remoteMetrics.totalTimeQueuedMicros, Microseconds(600)); + ASSERT_EQ(remoteMetrics.totalAdmissions, 11); + ASSERT_TRUE(remoteMetrics.wasLoadShed); + ASSERT_TRUE(remoteMetrics.wasDeprioritized); ASSERT_EQ(remoteMetrics.numInterruptChecks, 5); ASSERT_EQ(remoteMetrics.overdueInterruptApproxMax, Milliseconds(200)); ASSERT_EQ(remoteMetrics.nMatched, 3); @@ -3693,6 +3709,10 @@ TEST_F(AsyncResultsMergerTest, RemoteMetricsAggregatedLocally) { ASSERT_EQ(remoteMetrics.delinquentAcquisitions, 0); ASSERT_EQ(remoteMetrics.totalAcquisitionDelinquency, Milliseconds(0)); ASSERT_EQ(remoteMetrics.maxAcquisitionDelinquency, Milliseconds(0)); + ASSERT_EQ(remoteMetrics.totalTimeQueuedMicros, Microseconds(0)); + ASSERT_EQ(remoteMetrics.totalAdmissions, 0); + ASSERT_FALSE(remoteMetrics.wasLoadShed); + ASSERT_FALSE(remoteMetrics.wasDeprioritized); ASSERT_EQ(remoteMetrics.numInterruptChecks, 0); ASSERT_EQ(remoteMetrics.overdueInterruptApproxMax, Milliseconds(0)); ASSERT_EQ(remoteMetrics.nMatched, 0);