mongo/jstests/replsets/libs/dbcheck_utils.js

725 lines
26 KiB
JavaScript

/**
* Contains helper functions for testing dbCheck.
*/
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
import {FeatureFlagUtil} from "jstests/libs/feature_flag_util.js";
export const defaultSnapshotSize = 1000;
const infoBatchQuery = {
"severity": "info",
"operation": "dbCheckBatch",
"data.dbCheckParameters": {$exists: true},
};
const inconsistentBatchQuery = {
"severity": "error",
"msg": "dbCheck batch inconsistent",
"data.dbCheckParameters": {$exists: true},
};
export const logQueries = {
allErrorsOrWarningsQuery: {
$or: [{"severity": "warning"}, {"severity": "error"}],
"data.dbCheckParameters": {$exists: true},
},
recordNotFoundQuery: {
"severity": "error",
"msg": "found extra index key entry without corresponding document",
"data.context.indexSpec": {$exists: true},
"data.dbCheckParameters": {$exists: true},
},
missingIndexKeysQuery: {
"severity": "error",
"msg": "Document has missing index keys",
"data.context.missingIndexKeys": {$exists: true},
"data.dbCheckParameters": {$exists: true},
},
recordDoesNotMatchQuery: {
"severity": "error",
"msg": "found index key entry with corresponding document/keystring set that does not contain the expected key string",
"data.context.indexSpec": {$exists: true},
"data.dbCheckParameters": {$exists: true},
},
collNotFoundWarningQuery: {
severity: "warning",
"msg": "abandoning dbCheck extra index keys check because collection no longer exists",
"data.dbCheckParameters": {$exists: true},
},
indexNotFoundWarningQuery: {
severity: "warning",
"msg": "abandoning dbCheck extra index keys check because index no longer exists",
"data.dbCheckParameters": {$exists: true},
},
duringInitialSyncQuery: {severity: "warning", "msg": "cannot execute dbcheck due to ongoing initial sync"},
duringStableRecovery: {severity: "warning", "msg": "cannot execute dbcheck due to ongoing stable recovering"},
errorQuery: {"severity": "error", "data.dbCheckParameters": {$exists: true}},
warningQuery: {"severity": "warning", "data.dbCheckParameters": {$exists: true}},
infoOrErrorQuery: {
$or: [
{
"severity": "info",
"operation": "dbCheckBatch",
"data.dbCheckParameters": {$exists: true},
},
{"severity": "error", "data.dbCheckParameters": {$exists: true}},
],
},
infoBatchQuery: infoBatchQuery,
inconsistentBatchQuery: inconsistentBatchQuery,
allBatchesQuery: {
$or: [infoBatchQuery, inconsistentBatchQuery],
},
startStopQuery: {
$or: [
{"operation": "dbCheckStart", "severity": "info"},
{"operation": "dbCheckStop", "severity": "info"},
],
},
writeConcernErrorQuery: {
severity: "error",
"msg": "dbCheck failed waiting for writeConcern",
"data.dbCheckParameters": {$exists: true},
},
skipApplyingBatchOnSecondaryQuery: {
severity: "warning",
"msg": "skipping applying dbcheck batch because the 'skipApplyingDbCheckBatchOnSecondary' parameter is on",
"data.dbCheckParameters": {$exists: true},
},
secondaryBatchTimeoutReachedQuery: {
severity: "error",
"data.dbCheckParameters": {$exists: true},
"msg": "Secondary ending batch early because it reached dbCheckSecondaryBatchMaxTimeMs limit",
},
tooManyDbChecksInQueue: {
severity: "error",
"msg": "too many dbcheck runs in queue",
"data.dbCheckParameters": {$exists: true},
},
primarySteppedDown: {
severity: "warning",
"msg": "abandoning dbCheck batch due to stepdown",
},
missingIdIndex: {
severity: "error",
operation: "dbCheckBatch",
"data.error": "IndexNotFound: dbCheck needs _id index",
},
extraIndexKeyAtEndOfSecondary: {
severity: "error",
msg: "dbcheck batch inconsistent: at least one index key was found on the secondary but not the primary.",
"data.dbCheckParameters": {$exists: true},
},
checkIdIndexOnClusteredCollectionWarningQuery: {
severity: "warning",
"msg": "skipping dbCheck extra index keys check on the _id index because the target collection is a clustered collection that doesn't have an _id index",
"data.dbCheckParameters": {$exists: true},
"data.error": "DbCheckAttemptOnClusteredCollectionIdIndex: Clustered collection doesn't have an _id index.",
},
indexKeyOrderViolationQuery: {
severity: "error",
operation: "dbCheckBatch",
"data.error": {$regex: /^IndexKeyOrderViolation.*/},
},
};
// Apply function on all secondary nodes except arbiters.
export const forEachNonArbiterSecondary = (replSet, f) => {
for (let secondary of replSet.getSecondaries()) {
if (!secondary.adminCommand({isMaster: 1}).arbiterOnly) {
f(secondary);
}
}
};
// Apply function on primary and all secondary nodes.
export const forEachNonArbiterNode = (replSet, f) => {
f(replSet.getPrimary());
forEachNonArbiterSecondary(replSet, f);
};
// Clear local.system.healthlog.
export const clearHealthLog = (replSet) => {
forEachNonArbiterNode(replSet, (conn) =>
assert(conn.getDB("local").system.healthlog.drop(), `clearing health log failed for node ${conn.host}`),
);
replSet.awaitReplication();
};
export const logEveryBatch = (replSet) => {
forEachNonArbiterNode(replSet, (conn) => {
assert.commandWorked(conn.adminCommand({setParameter: 1, "dbCheckHealthLogEveryNBatches": 1}));
});
};
export const dbCheckCompleted = (db) => {
const inprog = db.getSiblingDB("admin").currentOp().inprog;
return inprog == undefined || inprog.filter((x) => x["desc"] == "dbCheck")[0] === undefined;
};
// Wait for dbCheck to complete (on both primaries and secondaries).
export const awaitDbCheckCompletion = (
replSet,
db,
waitForHealthLogDbCheckStop = true,
awaitCompletionTimeoutMs = null,
) => {
assert.soon(
() => dbCheckCompleted(db),
"dbCheck timed out for database: " + db.getName() + " for RS: " + replSet.getURL(),
awaitCompletionTimeoutMs,
);
const tokens = replSet.nodes.map((node) => node._securityToken);
try {
// This function might be called with a security token (to specify a tenant) on a
// connection. Calling tenant agnostic commands to await replication conflict with this
// token so temporarily remove it.
replSet.nodes.forEach((node) => node._setSecurityToken(undefined));
replSet.awaitSecondaryNodes();
replSet.awaitReplication();
if (waitForHealthLogDbCheckStop) {
forEachNonArbiterNode(replSet, function (node) {
const healthlog = node.getDB("local").system.healthlog;
assert.soon(
function () {
return healthlog.find({"operation": "dbCheckStop"}).itcount() == 1;
},
"dbCheck command didn't complete for database: " +
db.getName() +
" for node: " +
node.host +
", found health log: " +
tojson(healthlog.find().toArray()),
);
});
}
} finally {
replSet.nodes.forEach((node, idx) => {
node._setSecurityToken(tokens[idx]);
});
}
};
// Clear health log and insert nDocs documents.
export const resetAndInsert = (replSet, db, collName, nDocs, docSuffix = null, collOpts = {}) => {
db[collName].drop();
clearHealthLog(replSet);
assert.commandWorked(db.createCollection(collName, collOpts));
if (docSuffix) {
assert.commandWorked(
db[collName].insertMany(
[...Array(nDocs).keys()].map((x) => ({a: x.toString() + docSuffix})),
{ordered: false},
),
);
} else {
assert.commandWorked(
db[collName].insertMany(
[...Array(nDocs).keys()].map((x) => ({a: x})),
{ordered: false},
),
);
}
replSet.awaitReplication();
assert.eq(db.getCollection(collName).find({}).count(), nDocs);
};
// Clear health log and insert nDocs documents with two fields `a` and `b`.
export const resetAndInsertTwoFields = (replSet, db, collName, nDocs, docSuffix = null, collOpts = {}) => {
db[collName].drop();
clearHealthLog(replSet);
assert.commandWorked(db.createCollection(collName, collOpts));
if (docSuffix) {
assert.commandWorked(
db[collName].insertMany(
[...Array(nDocs).keys()].map((x) => ({a: x.toString() + docSuffix, b: x.toString() + docSuffix})),
{ordered: false},
),
);
} else {
assert.commandWorked(
db[collName].insertMany(
[...Array(nDocs).keys()].map((x) => ({a: x, b: x})),
{ordered: false},
),
);
}
replSet.awaitReplication();
assert.eq(db.getCollection(collName).find({}).count(), nDocs);
};
// Clear health log and insert nDocs documents with identical 'a' field
export const resetAndInsertIdentical = (replSet, db, collName, nDocs, collOpts = {}) => {
db[collName].drop();
clearHealthLog(replSet);
assert.commandWorked(db.createCollection(collName, collOpts));
assert.commandWorked(
db[collName].insertMany(
[...Array(nDocs).keys()].map((x) => ({_id: x, a: 0.1})),
{ordered: false},
),
);
replSet.awaitReplication();
assert.eq(db.getCollection(collName).find({}).count(), nDocs);
};
// Insert numDocs documents with missing index keys for testing.
export const insertDocsWithMissingIndexKeys = (
replSet,
dbName,
collName,
doc,
numDocs = 1,
doPrimary = true,
doSecondary = true,
collOpts = {},
) => {
const primaryDb = replSet.getPrimary().getDB(dbName);
const secondaryDb = replSet.getSecondary().getDB(dbName);
assert.commandWorked(primaryDb.createCollection(collName, collOpts));
// Create an index for every key in the document.
let index = {};
for (let key in doc) {
index[key] = 1;
assert.commandWorked(primaryDb[collName].createIndex(index));
index = {};
}
replSet.awaitReplication();
// dbCheck requires the _id index to iterate through documents in a batch.
let skipIndexNewRecordsExceptIdPrimary;
let skipIndexNewRecordsExceptIdSecondary;
if (doPrimary) {
skipIndexNewRecordsExceptIdPrimary = configureFailPoint(primaryDb, "skipIndexNewRecords", {skipIdIndex: false});
}
if (doSecondary) {
skipIndexNewRecordsExceptIdSecondary = configureFailPoint(secondaryDb, "skipIndexNewRecords", {
skipIdIndex: false,
});
}
for (let i = 0; i < numDocs; i++) {
assert.commandWorked(primaryDb[collName].insert(doc));
}
replSet.awaitReplication();
if (doPrimary) {
skipIndexNewRecordsExceptIdPrimary.off();
}
if (doSecondary) {
skipIndexNewRecordsExceptIdSecondary.off();
}
// Verify that index has been replicated to all nodes, including _id index.
forEachNonArbiterNode(replSet, function (node) {
assert.eq(Object.keys(doc).length + 1, node.getDB(dbName)[collName].getIndexes().length);
});
};
// Run dbCheck with given parameters and potentially wait for completion.
export const runDbCheck = (
replSet,
db,
collName,
parameters = {},
awaitCompletion = false,
waitForHealthLogDbCheckStop = true,
allowedErrorCodes = [],
) => {
if (!parameters.hasOwnProperty("maxBatchTimeMillis")) {
// Make this huge because stalls and pauses sometimes break this test.
parameters["maxBatchTimeMillis"] = 20000;
}
let dbCheckCommand = {dbCheck: collName};
for (let parameter in parameters) {
dbCheckCommand[parameter] = parameters[parameter];
}
let res = assert.commandWorkedOrFailedWithCode(db.runCommand(dbCheckCommand), allowedErrorCodes);
if (res.ok && awaitCompletion) {
awaitDbCheckCompletion(replSet, db, waitForHealthLogDbCheckStop);
}
};
export const checkHealthLog = (healthlog, query, numExpected, timeout = 60 * 1000) => {
let queryCount;
assert.soon(
function () {
queryCount = healthlog.find(query).count();
if (queryCount != numExpected) {
jsTestLog(
"health log query returned " +
queryCount +
" entries, expected " +
numExpected +
" query: " +
tojson(query) +
" found: " +
tojson(healthlog.find(query).toArray()),
);
}
return queryCount == numExpected;
},
"health log query returned " +
healthlog.find(query).count() +
" entries, expected " +
numExpected +
" query: " +
tojson(query) +
" found: " +
tojson(healthlog.find(query).toArray()) +
" HealthLog: " +
tojson(healthlog.find().toArray()),
timeout,
);
};
// Temporarily restart the secondary as a standalone, inject an inconsistency and
// restart it back as a secondary.
export const injectInconsistencyOnSecondary = (
replSet,
dbName,
cmd,
withMissingIndexKeys = false,
noCleanData = true,
) => {
const secondaryConn = replSet.getSecondary();
const secondaryNodeId = replSet.getNodeId(secondaryConn);
replSet.stop(secondaryNodeId, {forRestart: true /* preserve dbPath */});
const standaloneConn = MongoRunner.runMongod({
dbpath: secondaryConn.dbpath,
noCleanData: noCleanData,
});
const standaloneDB = standaloneConn.getDB(dbName);
let failpoint;
if (withMissingIndexKeys) {
failpoint = configureFailPoint(standaloneDB, "skipIndexNewRecords", {skipIdIndex: false});
}
assert.commandWorked(standaloneDB.runCommand(cmd));
if (withMissingIndexKeys) {
failpoint.off();
}
// Shut down the secondary and restart it as a member of the replica set.
MongoRunner.stopMongod(standaloneConn);
replSet.start(secondaryNodeId, {}, true /*restart*/);
replSet.awaitNodesAgreeOnPrimaryNoAuth();
};
// Returns a list of all collections in a given database excluding views.
function listCollectionsWithoutViews(database) {
let failMsg = "'listCollections' command failed";
// Some tests adds an invalid view, resulting in a failure of the 'listCollections' operation
// with an 'InvalidViewDefinition' error.
let res = assert.commandWorkedOrFailedWithCode(
database.runCommand("listCollections"),
ErrorCodes.InvalidViewDefinition,
failMsg,
);
if (res.ok) {
return res.cursor.firstBatch.filter((c) => c.type == "collection");
}
return [];
}
// Returns a list of names of all indexes.
function getIndexNames(db, collName, allowedErrorCodes) {
let failMsg = "'listIndexes' command failed";
let res = assert.commandWorkedOrFailedWithCode(db[collName].runCommand("listIndexes"), allowedErrorCodes, failMsg);
if (res.ok) {
return new DBCommandCursor(db, res).toArray().map((spec) => spec.name);
}
return [];
}
// List of collection names that are ignored from dbcheck.
const collNamesIgnoredFromDBCheck = ["operationalLatencyHistogramTest_coll_temp", "top_coll_temp"];
// Run dbCheck for all collections in the database with given parameters and potentially wait for
// completion.
export const runDbCheckForDatabase = (
replSet,
db,
awaitCompletion = false,
awaitCompletionTimeoutMs = null,
dbCheckParameters = {},
) => {
const secondaryIndexCheckEnabled = checkSecondaryIndexChecksInDbCheckFeatureFlagEnabled(replSet.getPrimary());
let collDbCheckParameters = dbCheckParameters;
if (secondaryIndexCheckEnabled) {
collDbCheckParameters = {
...dbCheckParameters,
validateMode: "dataConsistencyAndMissingIndexKeysCheck",
};
}
const allowedErrorCodes = [
ErrorCodes.NamespaceNotFound /* collection got dropped. */,
ErrorCodes.CommandNotSupportedOnView /* collection got dropped and a view
got created with the same name. */,
40619 /* collection is not replicated error. */,
// Some tests adds an invalid view, resulting in a failure of the 'dbcheck'
// operation with an 'InvalidViewDefinition' error.
ErrorCodes.InvalidViewDefinition,
// Might hit stale shardVersion response from shard config while racing with
// 'dropCollection' command.
ErrorCodes.StaleConfig,
// This code internally retries within dbCheck, but it may still fail if retries are
// exhausted.
ErrorCodes.ObjectIsBusy,
// TODO (SERVER-93173): listIndexes on tests which drop and recreate using different casing
// might fail due to attempting to create the database with only casing difference.
ErrorCodes.DatabaseDifferCase,
];
listCollectionsWithoutViews(db)
.map((c) => c.name)
.forEach((collName) => {
if (collNamesIgnoredFromDBCheck.includes(collName)) {
jsTestLog(
"dbCheck (" +
tojson(collDbCheckParameters) +
") is skipped on ns: " +
db.getName() +
"." +
collName +
" for RS: " +
replSet.getURL(),
);
return;
}
jsTestLog(
"dbCheck (" +
tojson(collDbCheckParameters) +
") is starting on ns: " +
db.getName() +
"." +
collName +
" for RS: " +
replSet.getURL(),
);
runDbCheck(
replSet,
db,
collName,
collDbCheckParameters /* parameters */,
true /* awaitCompletion */,
false /* waitForHealthLogDbCheckStop */,
allowedErrorCodes,
);
jsTestLog(
"dbCheck (" +
tojson(collDbCheckParameters) +
") is done on ns: " +
db.getName() +
"." +
collName +
" for RS: " +
replSet.getURL(),
);
if (!secondaryIndexCheckEnabled) {
return;
}
getIndexNames(db, collName, allowedErrorCodes).forEach((indexName) => {
let extraIndexDbCheckParameters = {
...dbCheckParameters,
validateMode: "extraIndexKeysCheck",
secondaryIndex: indexName,
};
jsTestLog(
"dbCheck (" +
tojson(extraIndexDbCheckParameters) +
") is starting on ns: " +
db.getName() +
"." +
collName +
" for RS: " +
replSet.getURL(),
);
runDbCheck(
replSet,
db,
collName,
extraIndexDbCheckParameters /* parameters */,
true /* awaitCompletion */,
false /* waitForHealthLogDbCheckStop */,
allowedErrorCodes,
);
jsTestLog(
"dbCheck (" +
tojson(extraIndexDbCheckParameters) +
") is done on ns: " +
db.getName() +
"." +
collName +
" for RS: " +
replSet.getURL(),
);
});
});
if (awaitCompletion) {
awaitDbCheckCompletion(replSet, db, false /*waitForHealthLogDbCheckStop*/, awaitCompletionTimeoutMs);
}
};
// Assert no errors/warnings (i.e., found inconsistencies). Tolerate
// SnapshotTooOld errors, as they can occur if the primary is slow enough processing a
// batch that the secondary is unable to obtain the timestamp the primary used.
export const assertForDbCheckErrors = (node, assertForErrors = true, assertForWarnings = false, errorsFound = []) => {
let severityValues = [];
if (assertForErrors == true) {
severityValues.push("error");
}
if (assertForWarnings == true) {
severityValues.push("warning");
}
const healthlog = node.getDB("local").system.healthlog;
// Regex matching strings that start without "SnapshotTooOld" or "ObjectIsBusy".
const regexString = /^((?!^(SnapshotTooOld|ObjectIsBusy)).)*$/;
// healthlog is a capped collection, truncation during scan might cause cursor
// invalidation. Truncated data is most likely from previous tests in the fixture, so we
// should still be able to catch errors by retrying.
assert.soon(
() => {
try {
let errs = healthlog.find({"severity": {$in: severityValues}, "data.error": regexString});
if (errs.hasNext()) {
const errMsg = "dbCheck found inconsistency on " + node.host;
jsTestLog(errMsg + ". Errors/Warnings: ");
let err;
for (let count = 0; errs.hasNext() && count < 20; count++) {
err = errs.next();
errorsFound.push(err);
jsTestLog(tojson(err));
}
jsTestLog("Full HealthLog: " + tojson(healthlog.find().toArray()));
assert(false, errMsg);
}
return true;
} catch (e) {
if (e.code !== ErrorCodes.CappedPositionLost) {
throw e;
}
jsTestLog(`Retrying on CappedPositionLost error: ${tojson(e)}`);
return false;
}
},
"healthlog scan could not complete.",
60000,
);
jsTestLog("Checked health log for on " + node.host);
};
// Check for dbcheck errors for all nodes in a replica set and ignoring arbiters.
export const assertForDbCheckErrorsForAllNodes = (rst, assertForErrors = true, assertForWarnings = false) => {
forEachNonArbiterNode(rst, (node) => assertForDbCheckErrors(node, assertForErrors, assertForWarnings));
};
/**
* Utility for checking if the featureFlagSecondaryIndexChecksInDbCheck is on.
*/
export function checkSecondaryIndexChecksInDbCheckFeatureFlagEnabled(conn) {
return FeatureFlagUtil.isPresentAndEnabled(conn, "SecondaryIndexChecksInDbCheck");
}
export function checkNumSnapshots(debugBuild, expectedNumSnapshots) {
if (debugBuild) {
const actualNumSnapshots =
rawMongoProgramOutput("Catalog snapshot for reverse lookup check ending").split(/7844808/).length - 1;
assert.eq(
actualNumSnapshots,
expectedNumSnapshots,
"expected " +
expectedNumSnapshots +
" catalog snapshots during reverse lookup, found " +
actualNumSnapshots,
);
}
}
export function setSnapshotSize(rst, snapshotSize) {
forEachNonArbiterNode(rst, (conn) => {
assert.commandWorked(
conn.adminCommand({"setParameter": 1, "dbCheckMaxTotalIndexKeysPerSnapshot": snapshotSize}),
);
});
}
export function resetSnapshotSize(rst) {
setSnapshotSize(rst, defaultSnapshotSize);
}
// Verifies that the healthlog contains entries that span the entire range that dbCheck should run
// against.
// TODO SERVER-92609: Currently this only works for extra index keys check. We should standardize
// this for collection check as well.
export function assertCompleteCoverage(healthlog, nDocs, indexName, docSuffix, start = null, end = null) {
// For non-empty docSuffix like 'aaa' for instance, if we insert over 10 docs, the lexicographic
// sorting order would be '0aaa', '1aaa', '10aaa', instead of increasing numerical order. Skip
// these checks as we have test coverage without needing to account for these specific cases.
if (nDocs >= 10 && (docSuffix !== null || docSuffix !== "")) {
return;
}
const truncateDocSuffix = (batchBoundary, docSuffix) => {
const index = batchBoundary.indexOf(docSuffix);
jsTestLog("Index : " + index);
if (index < 1) {
return batchBoundary;
}
return batchBoundary.substring(0, batchBoundary.indexOf(docSuffix));
};
const query = logQueries.allBatchesQuery;
const batches = healthlog.find(query).toArray();
let expectedBatchStart = start === null ? MinKey : start;
let batchEnd = "";
for (let batch of batches) {
let batchStart = batch.data.batchStart[indexName];
if (docSuffix) {
batchStart = truncateDocSuffix(batchStart, docSuffix);
}
// Verify that the batch start is correct.
assert.eq(expectedBatchStart, batchStart);
// Set our next expected batch start to the next value after the end of this batch.
batchEnd = batch.data.batchEnd[indexName];
if (docSuffix) {
batchEnd = truncateDocSuffix(batchEnd, docSuffix);
}
expectedBatchStart = batchEnd + 1;
}
if (end === null) {
// User did not issue a custom range, assert that we checked all documents.
assert.eq(MaxKey, batchEnd);
} else {
// User issued a custom end, but we do not know if the documents in the collection actually
// ended at that range. Verify that we have hit either the end of the collection, or we
// checked up until the specified range.
assert(
batchEnd === nDocs - 1 || batchEnd === end,
`batch end ${batchEnd} did not equal end of collection ${nDocs - 1} nor end of custom range ${end}`,
);
}
}