mirror of https://github.com/mongodb/mongo
SERVER-113749 Add deprioritization stats to queryStats (#44816)
GitOrigin-RevId: 437a47a8c7494c366a6dd5b04d0ae14ccfd1254a
This commit is contained in:
parent
50bac32127
commit
3bb367fcb1
|
|
@ -13,9 +13,10 @@
|
|||
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
|
||||
import {findMatchingLogLine} from "jstests/libs/log.js";
|
||||
import {after, before, describe, it} from "jstests/libs/mochalite.js";
|
||||
import {funWithArgs} from "jstests/libs/parallel_shell_helpers.js";
|
||||
import {ReplSetTest} from "jstests/libs/replsettest.js";
|
||||
import {Thread} from "jstests/libs/parallelTester.js";
|
||||
import {funWithArgs} from "jstests/libs/parallel_shell_helpers.js";
|
||||
import {getQueryExecMetrics, getQueryStats} from "jstests/libs/query/query_stats_utils.js";
|
||||
import {ReplSetTest} from "jstests/libs/replsettest.js";
|
||||
import {
|
||||
getExecutionControlStats,
|
||||
getTotalDeprioritizationCount,
|
||||
|
|
@ -404,6 +405,58 @@ describe("Execution control statistics and observability", function () {
|
|||
);
|
||||
});
|
||||
});
|
||||
describe("deprioritization reporting in query execution metrics", function () {
|
||||
let replTest, mongod, db, coll;
|
||||
const findComment = "deprioritized_find_test";
|
||||
|
||||
before(function () {
|
||||
replTest = new ReplSetTest({
|
||||
nodes: 1,
|
||||
nodeOptions: {
|
||||
setParameter: {
|
||||
// Force the query to yield frequently to better expose the low-priority
|
||||
// behavior.
|
||||
internalQueryExecYieldIterations: 1,
|
||||
executionControlDeprioritizationGate: true,
|
||||
executionControlHeuristicNumAdmissionsDeprioritizeThreshold: 1,
|
||||
logComponentVerbosity: {command: 2},
|
||||
internalQueryStatsRateLimit: -1,
|
||||
},
|
||||
slowms: 0,
|
||||
},
|
||||
});
|
||||
replTest.startSet();
|
||||
replTest.initiate();
|
||||
mongod = replTest.getPrimary();
|
||||
db = mongod.getDB(jsTestName());
|
||||
coll = db.coll;
|
||||
|
||||
insertTestDocuments(coll, 100);
|
||||
});
|
||||
|
||||
after(function () {
|
||||
replTest.stopSet();
|
||||
});
|
||||
|
||||
// TODO SERVER-112150: Once all versions support QueryStatsMetricsSubsections feature flag, simply check metrics["queryExec"].
|
||||
function verifyQueryStatsMetrics(metrics) {
|
||||
let statsObj = metrics.hasOwnProperty("queryExec") ? metrics["queryExec"] : metrics;
|
||||
assert.gte(statsObj.totalTimeQueuedMicros.sum, 0);
|
||||
assert.gte(statsObj.totalAdmissions.sum, 0);
|
||||
assert.gte(statsObj.wasLoadShed.false, 0);
|
||||
assert.gte(statsObj.wasDeprioritized.false, 0);
|
||||
}
|
||||
|
||||
it("should report deprioritization in query execution metrics", function () {
|
||||
runDeprioritizedFind(coll, findComment + "_totalization");
|
||||
const queryStats = getQueryStats(mongod, {collName: coll.getName()});
|
||||
assert(
|
||||
queryStats.length === 1,
|
||||
"Expected to find exactly one query stats entry for '" + coll.getName() + "' " + tojson(queryStats),
|
||||
);
|
||||
verifyQueryStatsMetrics(queryStats[0].metrics);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Delinquent operation statistics", function () {
|
||||
let replTest, mongod, db, coll;
|
||||
|
|
|
|||
|
|
@ -50,6 +50,11 @@ function assertMetricsEqual(
|
|||
assertMetricEqual(metrics, "totalAcquisitionDelinquencyMillis", 0);
|
||||
assertMetricEqual(metrics, "maxAcquisitionDelinquencyMillis", 0);
|
||||
assertMetricEqual(metrics, "overdueInterruptApproxMaxMillis", 0);
|
||||
// The exact value of the following metrics cannot be determined beforehand, but we can assert their existence.
|
||||
assertMetricEqual(metrics, "totalTimeQueuedMicros", undefined);
|
||||
assertMetricEqual(metrics, "totalAdmissions", undefined);
|
||||
assertMetricEqual(metrics, "wasLoadShed", undefined);
|
||||
assertMetricEqual(metrics, "wasDeprioritized", undefined);
|
||||
}
|
||||
|
||||
function assertWriteMetricsEqual(
|
||||
|
|
|
|||
|
|
@ -488,21 +488,36 @@ void CurOp::setEndOfOpMetrics(long long nreturned) {
|
|||
metrics.cpuNanos = metrics.cpuNanos.value_or(Nanoseconds(0)) + _debug.cpuTime;
|
||||
|
||||
if (const auto& admCtx = ExecutionAdmissionContext::get(opCtx());
|
||||
admCtx.getDelinquentAcquisitions() > 0 && !opCtx()->inMultiDocumentTransaction() &&
|
||||
!parent()) {
|
||||
// Note that we don't record delinquency stats around ticketing when in a
|
||||
// multi-document transaction, since operations within multi-document transactions hold
|
||||
// tickets for a long time by design and reporting them as delinquent will just create
|
||||
// noise in the data.
|
||||
!opCtx()->inMultiDocumentTransaction() && !parent()) {
|
||||
if (admCtx.getDelinquentAcquisitions() > 0) {
|
||||
|
||||
metrics.delinquentAcquisitions = metrics.delinquentAcquisitions.value_or(0) +
|
||||
static_cast<uint64_t>(admCtx.getDelinquentAcquisitions());
|
||||
metrics.totalAcquisitionDelinquency =
|
||||
metrics.totalAcquisitionDelinquency.value_or(Milliseconds(0)) +
|
||||
Milliseconds(admCtx.getTotalAcquisitionDelinquencyMillis());
|
||||
metrics.maxAcquisitionDelinquency = Milliseconds{
|
||||
std::max(metrics.maxAcquisitionDelinquency.value_or(Milliseconds(0)).count(),
|
||||
admCtx.getMaxAcquisitionDelinquencyMillis())};
|
||||
// Note that we don't record delinquency stats around ticketing when in a
|
||||
// multi-document transaction, since operations within multi-document
|
||||
// transactions hold tickets for a long time by design and reporting them as
|
||||
// delinquent will just create noise in the data.
|
||||
|
||||
metrics.delinquentAcquisitions = metrics.delinquentAcquisitions.value_or(0) +
|
||||
static_cast<uint64_t>(admCtx.getDelinquentAcquisitions());
|
||||
metrics.totalAcquisitionDelinquency =
|
||||
metrics.totalAcquisitionDelinquency.value_or(Milliseconds(0)) +
|
||||
Milliseconds(admCtx.getTotalAcquisitionDelinquencyMillis());
|
||||
metrics.maxAcquisitionDelinquency = Milliseconds{
|
||||
std::max(metrics.maxAcquisitionDelinquency.value_or(Milliseconds(0)).count(),
|
||||
admCtx.getMaxAcquisitionDelinquencyMillis())};
|
||||
}
|
||||
// Only record execution control metrics for non exempted operations, that is, user
|
||||
// operations, this is done to prevent having extra noise on the statistics comming from
|
||||
// internal operations.
|
||||
if (admCtx.getAdmissions() > 0 && admCtx.getExemptedAdmissions() == 0) {
|
||||
metrics.totalTimeQueuedMicros =
|
||||
metrics.totalTimeQueuedMicros.value_or(Microseconds(0)) +
|
||||
admCtx.totalTimeQueuedMicros();
|
||||
metrics.totalAdmissions = metrics.totalAdmissions.value_or(0) +
|
||||
(admCtx.getAdmissions() - admCtx.getExemptedAdmissions());
|
||||
metrics.wasLoadShed = metrics.wasLoadShed.value_or(false) || admCtx.getLoadShed();
|
||||
metrics.wasDeprioritized =
|
||||
metrics.wasDeprioritized.value_or(false) || admCtx.getLoadShed();
|
||||
}
|
||||
}
|
||||
|
||||
if (!parent()) {
|
||||
|
|
@ -1166,6 +1181,12 @@ void CurOp::reportState(BSONObjBuilder* builder,
|
|||
BSONObjBuilder sub(builder->subobjStart("delinquencyInfo"));
|
||||
OpDebug::appendDelinquentInfo(opCtx, sub);
|
||||
}
|
||||
if (admCtx.getAdmissions() > 0) {
|
||||
builder->append("totalTimeQueuedMicros", admCtx.totalTimeQueuedMicros().count());
|
||||
builder->append("totalAdmissions", admCtx.getAdmissions());
|
||||
builder->append("wasLoadShed", admCtx.getLoadShed());
|
||||
builder->append("wasDeprioritized", admCtx.getPriorityLowered());
|
||||
}
|
||||
}
|
||||
|
||||
populateCurrentOpQueueStats(opCtx, _tickSource, builder);
|
||||
|
|
|
|||
|
|
@ -105,6 +105,14 @@ TEST(CurOpTest, AddingAdditiveMetricsObjectsTogetherShouldAddFieldsTogether) {
|
|||
additiveMetricsToAdd.totalAcquisitionDelinquency = Milliseconds{200};
|
||||
currentAdditiveMetrics.maxAcquisitionDelinquency = Milliseconds{300};
|
||||
additiveMetricsToAdd.maxAcquisitionDelinquency = Milliseconds{100};
|
||||
currentAdditiveMetrics.totalTimeQueuedMicros = Microseconds{300};
|
||||
additiveMetricsToAdd.totalTimeQueuedMicros = Microseconds{200};
|
||||
currentAdditiveMetrics.totalAdmissions = 1;
|
||||
additiveMetricsToAdd.totalAdmissions = 2;
|
||||
currentAdditiveMetrics.wasLoadShed = false;
|
||||
additiveMetricsToAdd.wasLoadShed = true;
|
||||
currentAdditiveMetrics.wasDeprioritized = false;
|
||||
additiveMetricsToAdd.wasDeprioritized = true;
|
||||
currentAdditiveMetrics.numInterruptChecks = 1;
|
||||
additiveMetricsToAdd.numInterruptChecks = 2;
|
||||
currentAdditiveMetrics.overdueInterruptApproxMax = Milliseconds{100};
|
||||
|
|
@ -154,6 +162,15 @@ TEST(CurOpTest, AddingAdditiveMetricsObjectsTogetherShouldAddFieldsTogether) {
|
|||
ASSERT_EQ(*currentAdditiveMetrics.maxAcquisitionDelinquency,
|
||||
std::max(*additiveMetricsBeforeAdd.maxAcquisitionDelinquency,
|
||||
*additiveMetricsToAdd.maxAcquisitionDelinquency));
|
||||
ASSERT_EQ(*currentAdditiveMetrics.totalTimeQueuedMicros,
|
||||
*additiveMetricsBeforeAdd.totalTimeQueuedMicros +
|
||||
*additiveMetricsToAdd.totalTimeQueuedMicros);
|
||||
ASSERT_EQ(*currentAdditiveMetrics.totalAdmissions,
|
||||
*additiveMetricsBeforeAdd.totalAdmissions + *additiveMetricsToAdd.totalAdmissions);
|
||||
ASSERT_EQ(*currentAdditiveMetrics.wasLoadShed,
|
||||
*additiveMetricsBeforeAdd.wasLoadShed || *additiveMetricsToAdd.wasLoadShed);
|
||||
ASSERT_EQ(*currentAdditiveMetrics.wasDeprioritized,
|
||||
*additiveMetricsBeforeAdd.wasDeprioritized || *additiveMetricsToAdd.wasDeprioritized);
|
||||
ASSERT_EQ(*currentAdditiveMetrics.numInterruptChecks,
|
||||
*additiveMetricsBeforeAdd.numInterruptChecks +
|
||||
*additiveMetricsToAdd.numInterruptChecks);
|
||||
|
|
@ -181,6 +198,11 @@ TEST(CurOpTest, AddingUninitializedAdditiveMetricsFieldsShouldBeTreatedAsZero) {
|
|||
additiveMetricsToAdd.delinquentAcquisitions = 1;
|
||||
additiveMetricsToAdd.totalAcquisitionDelinquency = Milliseconds(100);
|
||||
additiveMetricsToAdd.maxAcquisitionDelinquency = Milliseconds(100);
|
||||
additiveMetricsToAdd.totalTimeQueuedMicros = Microseconds(100);
|
||||
additiveMetricsToAdd.totalAdmissions = 1;
|
||||
additiveMetricsToAdd.wasLoadShed = true;
|
||||
additiveMetricsToAdd.wasDeprioritized = true;
|
||||
additiveMetricsToAdd.maxAcquisitionDelinquency = Milliseconds(100);
|
||||
additiveMetricsToAdd.numInterruptChecks = 1;
|
||||
additiveMetricsToAdd.overdueInterruptApproxMax = Milliseconds(100);
|
||||
|
||||
|
|
@ -239,6 +261,14 @@ TEST(CurOpTest, AddingUninitializedAdditiveMetricsFieldsShouldBeTreatedAsZero) {
|
|||
ASSERT_EQ(*currentAdditiveMetrics.numInterruptChecks, *additiveMetricsToAdd.numInterruptChecks);
|
||||
ASSERT_EQ(*currentAdditiveMetrics.overdueInterruptApproxMax,
|
||||
*additiveMetricsToAdd.overdueInterruptApproxMax);
|
||||
|
||||
// The execution control fields for the current AdditiveMetrics object were not initialized, so
|
||||
// they should be treated as zero.
|
||||
ASSERT_EQ(*currentAdditiveMetrics.totalTimeQueuedMicros,
|
||||
*additiveMetricsToAdd.totalTimeQueuedMicros);
|
||||
ASSERT_EQ(*currentAdditiveMetrics.totalAdmissions, *additiveMetricsToAdd.totalAdmissions);
|
||||
ASSERT_EQ(*currentAdditiveMetrics.wasLoadShed, *additiveMetricsToAdd.wasLoadShed);
|
||||
ASSERT_EQ(*currentAdditiveMetrics.wasDeprioritized, *additiveMetricsToAdd.wasDeprioritized);
|
||||
}
|
||||
|
||||
TEST(CurOpTest, AdditiveMetricsFieldsShouldIncrementByN) {
|
||||
|
|
@ -278,6 +308,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateCursorMetrics) {
|
|||
additiveMetrics.delinquentAcquisitions = 2;
|
||||
additiveMetrics.totalAcquisitionDelinquency = Milliseconds(400);
|
||||
additiveMetrics.maxAcquisitionDelinquency = Milliseconds(300);
|
||||
additiveMetrics.totalTimeQueuedMicros = Microseconds(400);
|
||||
additiveMetrics.totalAdmissions = 2;
|
||||
additiveMetrics.wasLoadShed = false;
|
||||
additiveMetrics.wasDeprioritized = false;
|
||||
additiveMetrics.numInterruptChecks = 2;
|
||||
additiveMetrics.overdueInterruptApproxMax = Milliseconds(100);
|
||||
additiveMetrics.nMatched = 1;
|
||||
|
|
@ -286,7 +320,6 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateCursorMetrics) {
|
|||
additiveMetrics.ndeleted = 0;
|
||||
additiveMetrics.ninserted = 0;
|
||||
|
||||
|
||||
CursorMetrics cursorMetrics(3 /* keysExamined */,
|
||||
4 /* docsExamined */,
|
||||
10 /* bytesRead */,
|
||||
|
|
@ -307,6 +340,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateCursorMetrics) {
|
|||
cursorMetrics.setTotalAcquisitionDelinquencyMillis(400);
|
||||
cursorMetrics.setMaxAcquisitionDelinquencyMillis(200);
|
||||
cursorMetrics.setOverdueInterruptApproxMaxMillis(200);
|
||||
cursorMetrics.setTotalTimeQueuedMicros(400);
|
||||
cursorMetrics.setTotalAdmissions(5);
|
||||
cursorMetrics.setWasLoadShed(true);
|
||||
cursorMetrics.setWasDeprioritized(true);
|
||||
|
||||
additiveMetrics.aggregateCursorMetrics(cursorMetrics);
|
||||
|
||||
|
|
@ -328,6 +365,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateCursorMetrics) {
|
|||
ASSERT_EQ(*additiveMetrics.nModified, 2);
|
||||
ASSERT_EQ(*additiveMetrics.ndeleted, 0);
|
||||
ASSERT_EQ(*additiveMetrics.ninserted, 0);
|
||||
ASSERT_EQ(*additiveMetrics.totalTimeQueuedMicros, Microseconds(800));
|
||||
ASSERT_EQ(*additiveMetrics.totalAdmissions, 7);
|
||||
ASSERT_EQ(*additiveMetrics.wasLoadShed, true);
|
||||
ASSERT_EQ(*additiveMetrics.wasDeprioritized, true);
|
||||
}
|
||||
|
||||
TEST(CurOpTest, AdditiveMetricsShouldAggregateNegativeCpuNanos) {
|
||||
|
|
@ -400,6 +441,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateDataBearingNodeMetrics) {
|
|||
additiveMetrics.delinquentAcquisitions = 2;
|
||||
additiveMetrics.totalAcquisitionDelinquency = Milliseconds(400);
|
||||
additiveMetrics.maxAcquisitionDelinquency = Milliseconds(200);
|
||||
additiveMetrics.totalTimeQueuedMicros = Microseconds(400);
|
||||
additiveMetrics.totalAdmissions = 3;
|
||||
additiveMetrics.wasLoadShed = true;
|
||||
additiveMetrics.wasDeprioritized = true;
|
||||
additiveMetrics.numInterruptChecks = 2;
|
||||
additiveMetrics.overdueInterruptApproxMax = Milliseconds(100);
|
||||
|
||||
|
|
@ -413,6 +458,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateDataBearingNodeMetrics) {
|
|||
remoteMetrics.delinquentAcquisitions = 1;
|
||||
remoteMetrics.totalAcquisitionDelinquency = Milliseconds(300);
|
||||
remoteMetrics.maxAcquisitionDelinquency = Milliseconds(300);
|
||||
remoteMetrics.totalTimeQueuedMicros = Microseconds(300);
|
||||
remoteMetrics.totalAdmissions = 2;
|
||||
remoteMetrics.wasLoadShed = false;
|
||||
remoteMetrics.wasDeprioritized = false;
|
||||
remoteMetrics.numInterruptChecks = 1;
|
||||
remoteMetrics.overdueInterruptApproxMax = Milliseconds(300);
|
||||
|
||||
|
|
@ -427,6 +476,10 @@ TEST(CurOpTest, AdditiveMetricsShouldAggregateDataBearingNodeMetrics) {
|
|||
ASSERT_EQ(*additiveMetrics.delinquentAcquisitions, 3);
|
||||
ASSERT_EQ(*additiveMetrics.totalAcquisitionDelinquency, Milliseconds(700));
|
||||
ASSERT_EQ(*additiveMetrics.maxAcquisitionDelinquency, Milliseconds(300));
|
||||
ASSERT_EQ(*additiveMetrics.totalTimeQueuedMicros, Microseconds(700));
|
||||
ASSERT_EQ(*additiveMetrics.totalAdmissions, 5);
|
||||
ASSERT_EQ(*additiveMetrics.wasLoadShed, true);
|
||||
ASSERT_EQ(*additiveMetrics.wasDeprioritized, true);
|
||||
ASSERT_EQ(*additiveMetrics.numInterruptChecks, 3);
|
||||
ASSERT_EQ(*additiveMetrics.overdueInterruptApproxMax, Milliseconds(300));
|
||||
}
|
||||
|
|
@ -696,6 +749,41 @@ TEST(CurOpTest, ReportStateIncludesDelinquentStatsIfNonZero) {
|
|||
}
|
||||
}
|
||||
|
||||
TEST(CurOpTest, ReportDeprioritizationStats) {
|
||||
QueryTestServiceContext serviceContext;
|
||||
auto opCtx = serviceContext.makeOperationContext();
|
||||
auto curOp = CurOp::get(*opCtx);
|
||||
curOp->ensureStarted();
|
||||
|
||||
// 1. Check the default state. If they're zero, they're not reported.
|
||||
{
|
||||
BSONObjBuilder bob;
|
||||
curOp->reportState(&bob, SerializationContext{});
|
||||
BSONObj state = bob.obj();
|
||||
ASSERT_FALSE(state.hasField("totalTimeQueuedMicros"));
|
||||
ASSERT_FALSE(state.hasField("totalAdmissions"));
|
||||
ASSERT_FALSE(state.hasField("wasLoadShed"));
|
||||
ASSERT_FALSE(state.hasField("wasDeprioritized"));
|
||||
}
|
||||
|
||||
// 2. If there are admissions then report the stats.
|
||||
{
|
||||
ExecutionAdmissionContext::get(opCtx.get()).setAdmission_forTest(1);
|
||||
ExecutionAdmissionContext::get(opCtx.get()).setTotalTimeQueuedMicros_forTest(100);
|
||||
BSONObjBuilder bob;
|
||||
curOp->reportState(&bob, SerializationContext{});
|
||||
BSONObj state = bob.obj();
|
||||
ASSERT_TRUE(state.hasField("totalAdmissions"));
|
||||
ASSERT_EQ(state["totalAdmissions"].Number(), 1);
|
||||
ASSERT_TRUE(state.hasField("totalTimeQueuedMicros"));
|
||||
ASSERT_EQ(state["totalTimeQueuedMicros"].Number(), 100);
|
||||
ASSERT_TRUE(state.hasField("wasLoadShed"));
|
||||
ASSERT_FALSE(state["wasLoadShed"].Bool());
|
||||
ASSERT_TRUE(state.hasField("wasDeprioritized"));
|
||||
ASSERT_FALSE(state["wasDeprioritized"].Bool());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(CurOpTest, ReportStateIncludesPriorityLowered) {
|
||||
QueryTestServiceContext serviceContext;
|
||||
auto opCtx = serviceContext.makeOperationContext();
|
||||
|
|
|
|||
|
|
@ -1335,6 +1335,12 @@ CursorMetrics OpDebug::getCursorMetrics() const {
|
|||
metrics.setMaxAcquisitionDelinquencyMillis(
|
||||
additiveMetrics.maxAcquisitionDelinquency.value_or(Milliseconds(0)).count());
|
||||
|
||||
metrics.setTotalTimeQueuedMicros(
|
||||
additiveMetrics.totalTimeQueuedMicros.value_or(Microseconds(0)).count());
|
||||
metrics.setTotalAdmissions(additiveMetrics.totalAdmissions.value_or(0));
|
||||
metrics.setWasLoadShed(additiveMetrics.wasLoadShed.value_or(false));
|
||||
metrics.setWasDeprioritized(additiveMetrics.wasDeprioritized.value_or(false));
|
||||
|
||||
metrics.setNumInterruptChecks(additiveMetrics.numInterruptChecks.value_or(0));
|
||||
metrics.setOverdueInterruptApproxMaxMillis(
|
||||
additiveMetrics.overdueInterruptApproxMax.value_or(Milliseconds(0)).count());
|
||||
|
|
@ -1481,6 +1487,10 @@ void OpDebug::AdditiveMetrics::add(const AdditiveMetrics& otherMetrics) {
|
|||
maxAcquisitionDelinquency = std::max(maxAcquisitionDelinquency.value_or(Milliseconds(0)),
|
||||
*otherMetrics.maxAcquisitionDelinquency);
|
||||
}
|
||||
totalTimeQueuedMicros = addOptionals(totalTimeQueuedMicros, otherMetrics.totalTimeQueuedMicros);
|
||||
totalAdmissions = addOptionals(totalAdmissions, otherMetrics.totalAdmissions);
|
||||
wasLoadShed = addOptionals(wasLoadShed, otherMetrics.wasLoadShed);
|
||||
wasDeprioritized = addOptionals(wasDeprioritized, otherMetrics.wasDeprioritized);
|
||||
|
||||
hasSortStage = hasSortStage || otherMetrics.hasSortStage;
|
||||
usedDisk = usedDisk || otherMetrics.usedDisk;
|
||||
|
|
@ -1514,6 +1524,12 @@ void OpDebug::AdditiveMetrics::aggregateDataBearingNodeMetrics(
|
|||
maxAcquisitionDelinquency = std::max(maxAcquisitionDelinquency.value_or(Milliseconds(0)),
|
||||
metrics.maxAcquisitionDelinquency);
|
||||
|
||||
totalTimeQueuedMicros =
|
||||
totalTimeQueuedMicros.value_or(Microseconds(0)) + metrics.totalTimeQueuedMicros;
|
||||
totalAdmissions = totalAdmissions.value_or(0) + metrics.totalAdmissions;
|
||||
wasLoadShed = wasLoadShed.value_or(false) || metrics.wasLoadShed;
|
||||
wasDeprioritized = wasDeprioritized.value_or(false) || metrics.wasDeprioritized;
|
||||
|
||||
numInterruptChecks = numInterruptChecks.value_or(0) + metrics.numInterruptChecks;
|
||||
overdueInterruptApproxMax = std::max(overdueInterruptApproxMax.value_or(Milliseconds(0)),
|
||||
metrics.overdueInterruptApproxMax);
|
||||
|
|
@ -1558,7 +1574,11 @@ void OpDebug::AdditiveMetrics::aggregateCursorMetrics(const CursorMetrics& metri
|
|||
static_cast<uint64_t>(metrics.getNUpserted()),
|
||||
static_cast<uint64_t>(metrics.getNModified()),
|
||||
static_cast<uint64_t>(metrics.getNDeleted()),
|
||||
static_cast<uint64_t>(metrics.getNInserted())});
|
||||
static_cast<uint64_t>(metrics.getNInserted()),
|
||||
Microseconds(metrics.getTotalTimeQueuedMicros()),
|
||||
static_cast<uint64_t>(metrics.getTotalAdmissions()),
|
||||
metrics.getWasLoadShed(),
|
||||
metrics.getWasDeprioritized()});
|
||||
}
|
||||
|
||||
void OpDebug::AdditiveMetrics::aggregateStorageStats(const StorageStats& stats) {
|
||||
|
|
|
|||
|
|
@ -230,6 +230,13 @@ public:
|
|||
|
||||
boost::optional<uint64_t> numInterruptChecks;
|
||||
boost::optional<Milliseconds> overdueInterruptApproxMax;
|
||||
|
||||
// If query stats are being collected for this operation, stores the execution control
|
||||
// statistics.
|
||||
boost::optional<Microseconds> totalTimeQueuedMicros;
|
||||
boost::optional<uint64_t> totalAdmissions;
|
||||
boost::optional<bool> wasLoadShed;
|
||||
boost::optional<bool> wasDeprioritized;
|
||||
};
|
||||
|
||||
MONGO_MOD_PRIVATE OpDebug() = default;
|
||||
|
|
|
|||
|
|
@ -135,6 +135,26 @@ structs:
|
|||
description: "The number of documents inserted (excluding upserts)."
|
||||
type: long
|
||||
stability: unstable
|
||||
totalTimeQueuedMicros:
|
||||
description: "Total time spent waiting for a ticket."
|
||||
type: long
|
||||
stability: unstable
|
||||
default: 0
|
||||
totalAdmissions:
|
||||
description: "Total number of admissions during the operation."
|
||||
type: long
|
||||
stability: unstable
|
||||
default: 0
|
||||
wasLoadShed:
|
||||
description: "If true, the operation was load shed."
|
||||
type: bool
|
||||
stability: unstable
|
||||
default: false
|
||||
wasDeprioritized:
|
||||
description: "If true, the operation was deprioritized at some point."
|
||||
type: bool
|
||||
stability: unstable
|
||||
default: false
|
||||
|
||||
ResponseCursorBase:
|
||||
description: "Common fields of initial and subsequent cursor responses."
|
||||
|
|
|
|||
|
|
@ -63,6 +63,10 @@ static const BSONObj basicMetricsObj = fromjson(R"({
|
|||
delinquentAcquisitions: {"$numberLong": "0"},
|
||||
totalAcquisitionDelinquencyMillis: {"$numberLong": "0"},
|
||||
maxAcquisitionDelinquencyMillis: {"$numberLong": "0"},
|
||||
totalTimeQueuedMicros: {"$numberLong": "0"},
|
||||
totalAdmissions: {"$numberLong": "0"},
|
||||
wasLoadShed: false,
|
||||
wasDeprioritized: false,
|
||||
numInterruptChecks: {"$numberLong": "0"},
|
||||
overdueInterruptApproxMaxMillis: {"$numberLong": "0"},
|
||||
nMatched: {"$numberLong": "0"},
|
||||
|
|
@ -323,6 +327,10 @@ TEST(CursorResponseTest, parseFromBSONCursorMetrics) {
|
|||
ASSERT_EQ(metrics.getDelinquentAcquisitions(), 0);
|
||||
ASSERT_EQ(metrics.getTotalAcquisitionDelinquencyMillis(), 0);
|
||||
ASSERT_EQ(metrics.getMaxAcquisitionDelinquencyMillis(), 0);
|
||||
ASSERT_EQ(metrics.getTotalTimeQueuedMicros(), 0);
|
||||
ASSERT_EQ(metrics.getTotalAdmissions(), 0);
|
||||
ASSERT_FALSE(metrics.getWasLoadShed());
|
||||
ASSERT_FALSE(metrics.getWasDeprioritized());
|
||||
ASSERT_EQ(metrics.getNumInterruptChecks(), 0);
|
||||
ASSERT_EQ(metrics.getOverdueInterruptApproxMaxMillis(), 0);
|
||||
ASSERT_EQ(metrics.getNMatched(), 0);
|
||||
|
|
@ -991,6 +999,10 @@ TEST_F(CursorResponseBuilderTest, buildResponseWithAllKnownFields) {
|
|||
ASSERT_EQ(parsedMetrics->getDelinquentAcquisitions(), 0);
|
||||
ASSERT_EQ(parsedMetrics->getTotalAcquisitionDelinquencyMillis(), 0);
|
||||
ASSERT_EQ(parsedMetrics->getMaxAcquisitionDelinquencyMillis(), 0);
|
||||
ASSERT_EQ(parsedMetrics->getTotalTimeQueuedMicros(), 0);
|
||||
ASSERT_EQ(parsedMetrics->getTotalAdmissions(), 0);
|
||||
ASSERT_FALSE(parsedMetrics->getWasLoadShed());
|
||||
ASSERT_FALSE(parsedMetrics->getWasDeprioritized());
|
||||
ASSERT_EQ(parsedMetrics->getNumInterruptChecks(), 15);
|
||||
ASSERT_EQ(parsedMetrics->getOverdueInterruptApproxMaxMillis(), 0);
|
||||
ASSERT_EQ(parsedMetrics->getNMatched(), 1);
|
||||
|
|
|
|||
|
|
@ -69,6 +69,11 @@ struct MONGO_MOD_PUB DataBearingNodeMetrics {
|
|||
uint64_t nDeleted = 0;
|
||||
uint64_t nInserted = 0;
|
||||
|
||||
Microseconds totalTimeQueuedMicros{0};
|
||||
uint64_t totalAdmissions = 0;
|
||||
bool wasLoadShed = false;
|
||||
bool wasDeprioritized = false;
|
||||
|
||||
/**
|
||||
* Adds the fields from the given object into the fields of this object using addition (in the
|
||||
* case of numeric metrics) or conjunction/disjunction (in the case of boolean metrics).
|
||||
|
|
@ -96,6 +101,10 @@ struct MONGO_MOD_PUB DataBearingNodeMetrics {
|
|||
nModified += other.nModified;
|
||||
nDeleted += other.nDeleted;
|
||||
nInserted += other.nInserted;
|
||||
totalTimeQueuedMicros += other.totalTimeQueuedMicros;
|
||||
totalAdmissions += other.totalAdmissions;
|
||||
wasLoadShed = wasLoadShed || other.wasLoadShed;
|
||||
wasDeprioritized = wasDeprioritized || other.wasDeprioritized;
|
||||
}
|
||||
|
||||
void add(const boost::optional<DataBearingNodeMetrics>& other) {
|
||||
|
|
@ -131,6 +140,10 @@ struct MONGO_MOD_PUB DataBearingNodeMetrics {
|
|||
nModified += metrics.getNModified();
|
||||
nDeleted += metrics.getNDeleted();
|
||||
nInserted += metrics.getNInserted();
|
||||
totalTimeQueuedMicros += Microseconds(metrics.getTotalTimeQueuedMicros());
|
||||
totalAdmissions += metrics.getTotalAdmissions();
|
||||
wasLoadShed = wasLoadShed || metrics.getWasLoadShed();
|
||||
wasDeprioritized = wasDeprioritized || metrics.getWasDeprioritized();
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -269,6 +269,11 @@ void updateQueryExecStatistics(QueryExecEntry& queryExecEntryToUpdate,
|
|||
queryExecEntryToUpdate.maxAcquisitionDelinquencyMillis.aggregate(
|
||||
snapshot.maxAcquisitionDelinquencyMillis);
|
||||
|
||||
queryExecEntryToUpdate.totalTimeQueuedMicros.aggregate(snapshot.totalTimeQueuedMicros);
|
||||
queryExecEntryToUpdate.totalAdmissions.aggregate(snapshot.totalAdmissions);
|
||||
queryExecEntryToUpdate.wasLoadShed.aggregate(snapshot.wasLoadShed);
|
||||
queryExecEntryToUpdate.wasDeprioritized.aggregate(snapshot.wasDeprioritized);
|
||||
|
||||
// Store the number of interrupt checks per second as a rate, this can give us a better sense of
|
||||
// how often interrupts are being checked relative to the total execution time across multiple
|
||||
// query runs.
|
||||
|
|
@ -503,7 +508,10 @@ QueryStatsSnapshot captureMetrics(const OperationContext* opCtx,
|
|||
static_cast<uint64_t>(metrics.nModified.value_or(0)),
|
||||
static_cast<uint64_t>(metrics.ndeleted.value_or(0)),
|
||||
static_cast<uint64_t>(metrics.ninserted.value_or(0)),
|
||||
};
|
||||
metrics.totalTimeQueuedMicros.value_or(Microseconds(0)).count(),
|
||||
static_cast<uint64_t>(metrics.totalAdmissions.value_or(0)),
|
||||
metrics.wasLoadShed.value_or(false),
|
||||
metrics.wasDeprioritized.value_or(false)};
|
||||
|
||||
return snapshot;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -253,6 +253,11 @@ struct QueryStatsSnapshot {
|
|||
uint64_t nModified;
|
||||
uint64_t nDeleted;
|
||||
uint64_t nInserted;
|
||||
|
||||
int64_t totalTimeQueuedMicros;
|
||||
uint64_t totalAdmissions;
|
||||
bool wasLoadShed;
|
||||
bool wasDeprioritized;
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -93,6 +93,11 @@ void QueryExecEntry::toBSON(BSONObjBuilder& queryStatsBuilder, bool buildAsSubse
|
|||
totalAcquisitionDelinquencyMillis.appendTo(*builder, "totalAcquisitionDelinquencyMillis");
|
||||
maxAcquisitionDelinquencyMillis.appendTo(*builder, "maxAcquisitionDelinquencyMillis");
|
||||
|
||||
totalTimeQueuedMicros.appendTo(*builder, "totalTimeQueuedMicros");
|
||||
totalAdmissions.appendTo(*builder, "totalAdmissions");
|
||||
wasLoadShed.appendTo(*builder, "wasLoadShed");
|
||||
wasDeprioritized.appendTo(*builder, "wasDeprioritized");
|
||||
|
||||
numInterruptChecksPerSec.appendTo(*builder, "numInterruptChecksPerSec");
|
||||
overdueInterruptApproxMaxMillis.appendTo(*builder, "overdueInterruptApproxMaxMillis");
|
||||
|
||||
|
|
|
|||
|
|
@ -85,6 +85,14 @@ struct QueryExecEntry {
|
|||
AggregatedMetric<int64_t> totalAcquisitionDelinquencyMillis;
|
||||
AggregatedMetric<int64_t> maxAcquisitionDelinquencyMillis;
|
||||
|
||||
/**
|
||||
* Aggregates the execution control stats.
|
||||
*/
|
||||
AggregatedMetric<int64_t> totalTimeQueuedMicros;
|
||||
AggregatedMetric<uint64_t> totalAdmissions;
|
||||
AggregatedBool wasLoadShed;
|
||||
AggregatedBool wasDeprioritized;
|
||||
|
||||
/**
|
||||
* Aggregates the checkForInterrupt stats including getMore requests.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -1570,6 +1570,10 @@ void verifyQueryStatsBSON(QueryStatsEntry& qse, const QueryStatsBSONParams& para
|
|||
.append("delinquentAcquisitions", emptyIntMetric)
|
||||
.append("totalAcquisitionDelinquencyMillis", emptyIntMetric)
|
||||
.append("maxAcquisitionDelinquencyMillis", emptyIntMetric)
|
||||
.append("totalTimeQueuedMicros", emptyIntMetric)
|
||||
.append("totalAdmissions", emptyIntMetric)
|
||||
.append("wasLoadShed", boolMetricBson(0, 0))
|
||||
.append("wasDeprioritized", boolMetricBson(0, 0))
|
||||
.append("numInterruptChecksPerSec", emptyIntMetric)
|
||||
.append("overdueInterruptApproxMaxMillis", emptyIntMetric);
|
||||
|
||||
|
|
|
|||
|
|
@ -3590,6 +3590,10 @@ TEST_F(AsyncResultsMergerTest, RemoteMetricsAggregatedLocally) {
|
|||
metrics.setDelinquentAcquisitions(3);
|
||||
metrics.setTotalAcquisitionDelinquencyMillis(100);
|
||||
metrics.setMaxAcquisitionDelinquencyMillis(80);
|
||||
metrics.setTotalTimeQueuedMicros(400);
|
||||
metrics.setTotalAdmissions(5);
|
||||
metrics.setWasLoadShed(false);
|
||||
metrics.setWasDeprioritized(false);
|
||||
metrics.setOverdueInterruptApproxMaxMillis(100);
|
||||
scheduleResponse(id, {fromjson("{_id: 1}")}, std::move(metrics));
|
||||
}
|
||||
|
|
@ -3616,6 +3620,10 @@ TEST_F(AsyncResultsMergerTest, RemoteMetricsAggregatedLocally) {
|
|||
ASSERT_EQ(remoteMetrics.delinquentAcquisitions, 3);
|
||||
ASSERT_EQ(remoteMetrics.totalAcquisitionDelinquency, Milliseconds(100));
|
||||
ASSERT_EQ(remoteMetrics.maxAcquisitionDelinquency, Milliseconds(80));
|
||||
ASSERT_EQ(remoteMetrics.totalTimeQueuedMicros, Microseconds(400));
|
||||
ASSERT_EQ(remoteMetrics.totalAdmissions, 5);
|
||||
ASSERT_FALSE(remoteMetrics.wasLoadShed);
|
||||
ASSERT_FALSE(remoteMetrics.wasDeprioritized);
|
||||
ASSERT_EQ(remoteMetrics.numInterruptChecks, 3);
|
||||
ASSERT_EQ(remoteMetrics.overdueInterruptApproxMax, Milliseconds(100));
|
||||
ASSERT_EQ(remoteMetrics.nMatched, 1);
|
||||
|
|
@ -3646,6 +3654,10 @@ TEST_F(AsyncResultsMergerTest, RemoteMetricsAggregatedLocally) {
|
|||
metrics.setDelinquentAcquisitions(2);
|
||||
metrics.setTotalAcquisitionDelinquencyMillis(150);
|
||||
metrics.setMaxAcquisitionDelinquencyMillis(120);
|
||||
metrics.setTotalTimeQueuedMicros(200);
|
||||
metrics.setTotalAdmissions(6);
|
||||
metrics.setWasLoadShed(true);
|
||||
metrics.setWasDeprioritized(true);
|
||||
metrics.setOverdueInterruptApproxMaxMillis(200);
|
||||
scheduleResponse(CursorId(0), {fromjson("{_id: 2}")}, std::move(metrics));
|
||||
}
|
||||
|
|
@ -3670,6 +3682,10 @@ TEST_F(AsyncResultsMergerTest, RemoteMetricsAggregatedLocally) {
|
|||
ASSERT_EQ(remoteMetrics.delinquentAcquisitions, 5);
|
||||
ASSERT_EQ(remoteMetrics.totalAcquisitionDelinquency, Milliseconds(250));
|
||||
ASSERT_EQ(remoteMetrics.maxAcquisitionDelinquency, Milliseconds(120));
|
||||
ASSERT_EQ(remoteMetrics.totalTimeQueuedMicros, Microseconds(600));
|
||||
ASSERT_EQ(remoteMetrics.totalAdmissions, 11);
|
||||
ASSERT_TRUE(remoteMetrics.wasLoadShed);
|
||||
ASSERT_TRUE(remoteMetrics.wasDeprioritized);
|
||||
ASSERT_EQ(remoteMetrics.numInterruptChecks, 5);
|
||||
ASSERT_EQ(remoteMetrics.overdueInterruptApproxMax, Milliseconds(200));
|
||||
ASSERT_EQ(remoteMetrics.nMatched, 3);
|
||||
|
|
@ -3693,6 +3709,10 @@ TEST_F(AsyncResultsMergerTest, RemoteMetricsAggregatedLocally) {
|
|||
ASSERT_EQ(remoteMetrics.delinquentAcquisitions, 0);
|
||||
ASSERT_EQ(remoteMetrics.totalAcquisitionDelinquency, Milliseconds(0));
|
||||
ASSERT_EQ(remoteMetrics.maxAcquisitionDelinquency, Milliseconds(0));
|
||||
ASSERT_EQ(remoteMetrics.totalTimeQueuedMicros, Microseconds(0));
|
||||
ASSERT_EQ(remoteMetrics.totalAdmissions, 0);
|
||||
ASSERT_FALSE(remoteMetrics.wasLoadShed);
|
||||
ASSERT_FALSE(remoteMetrics.wasDeprioritized);
|
||||
ASSERT_EQ(remoteMetrics.numInterruptChecks, 0);
|
||||
ASSERT_EQ(remoteMetrics.overdueInterruptApproxMax, Milliseconds(0));
|
||||
ASSERT_EQ(remoteMetrics.nMatched, 0);
|
||||
|
|
|
|||
Loading…
Reference in New Issue