mongo/jstests/replsets/dbcheck.js

540 lines
19 KiB
JavaScript

/**
* Test the dbCheck command.
*
* @tags: [
* # We need persistence as we temporarily restart nodes as standalones.
* requires_persistence,
* assumes_against_mongod_not_mongos,
* ]
*/
(function() {
"use strict";
// This test injects inconsistencies between replica set members; do not fail because of expected
// dbHash differences.
TestData.skipCheckDBHashes = true;
let replSet = new ReplSetTest({name: "dbCheckSet", nodes: 2});
replSet.startSet();
replSet.initiate();
replSet.awaitSecondaryNodes();
function forEachSecondary(f) {
for (let secondary of replSet.getSecondaries()) {
f(secondary);
}
}
function forEachNode(f) {
f(replSet.getPrimary());
forEachSecondary(f);
}
let dbName = "dbCheck-test";
let collName = "dbcheck-collection";
// Clear local.system.healthlog.
function clearLog() {
forEachNode(conn => conn.getDB("local").system.healthlog.drop());
}
function addEnoughForMultipleBatches(collection) {
collection.insertMany([...Array(10000).keys()].map(x => ({_id: x})));
}
// Name for a collection which takes multiple batches to check and which shouldn't be modified
// by any of the tests.
const multiBatchSimpleCollName = "dbcheck-simple-collection";
const multiBatchSimpleCollSize = 10000;
replSet.getPrimary().getDB(dbName)[multiBatchSimpleCollName].insertMany(
[...Array(10000).keys()].map(x => ({_id: x})));
function dbCheckCompleted(db) {
return db.currentOp().inprog.filter(x => x["desc"] == "dbCheck")[0] === undefined;
}
// Wait for DeferredWriter writes to local.system.healthlog to eventually complete.
// Requires clearLog() before the test case is run.
// TODO SERVER-61765 remove this function altoghether when healthlogging becomes
// synchronous.
function dbCheckHealthLogCompleted(db, coll, maxKey, maxSize, maxCount) {
let query = {"namespace": coll.getFullName(), "operation": "dbCheckBatch"};
if (maxSize === undefined && maxCount === undefined && maxKey === undefined) {
query['data.maxKey'] = {"$type": "maxKey"};
}
if (maxCount !== undefined) {
query['data.count'] = maxCount;
} else {
if (maxSize !== undefined) {
query['data.bytes'] = maxSize;
} else {
if (maxKey !== undefined) {
query['data.maxKey'] = maxKey;
}
}
}
return db.getSiblingDB("local").system.healthlog.find(query).itcount() === 1;
}
// Wait for dbCheck to complete (on both primaries and secondaries). Fails an assertion if
// dbCheck takes longer than maxMs.
function awaitDbCheckCompletion(db, collName, maxKey, maxSize, maxCount) {
let start = Date.now();
assert.soon(() => dbCheckCompleted(db), "dbCheck timed out");
replSet.awaitSecondaryNodes();
replSet.awaitReplication();
forEachNode(function(node) {
const healthlog = node.getDB('local').system.healthlog;
assert.soon(function() {
return (healthlog.find({"operation": "dbCheckStop"}).itcount() == 1);
}, "dbCheck command didn't complete");
});
}
// Check that everything in the health log shows a successful and complete check with no found
// inconsistencies.
function checkLogAllConsistent(conn) {
let healthlog = conn.getDB("local").system.healthlog;
const debugBuild = conn.getDB('admin').adminCommand('buildInfo').debug;
if (debugBuild) {
// These tests only run on debug builds because they rely on dbCheck health-logging
// all info-level batch results.
assert(healthlog.find().count(), "dbCheck put no batches in health log");
let maxResult = healthlog.aggregate([
{$match: {operation: "dbCheckBatch"}},
{$group: {_id: 1, key: {$max: "$data.maxKey"}}}
]);
assert(maxResult.hasNext(), "dbCheck put no batches in health log");
assert.eq(maxResult.next().key, {"$maxKey": 1}, "dbCheck batches should end at MaxKey");
let minResult = healthlog.aggregate([
{$match: {operation: "dbCheckBatch"}},
{$group: {_id: 1, key: {$min: "$data.minKey"}}}
]);
assert(minResult.hasNext(), "dbCheck put no batches in health log");
assert.eq(minResult.next().key, {"$minKey": 1}, "dbCheck batches should start at MinKey");
}
// Assert no errors (i.e., found inconsistencies).
let errs = healthlog.find({"severity": {"$ne": "info"}});
if (errs.hasNext()) {
assert(false, "dbCheck found inconsistency: " + tojson(errs.next()));
}
// Assert no failures (i.e., checks that failed to complete).
let failedChecks = healthlog.find({"operation": "dbCheckBatch", "data.success": false});
if (failedChecks.hasNext()) {
assert(false, "dbCheck batch failed: " + tojson(failedChecks.next()));
}
if (debugBuild) {
// These tests only run on debug builds because they rely on dbCheck health-logging
// all info-level batch results.
// Finds an entry with data.minKey === MinKey, and then matches its maxKey against
// another document's minKey, and so on, and then checks that the result of that search
// has data.maxKey === MaxKey.
let completeCoverage = healthlog.aggregate([
{$match: {"operation": "dbCheckBatch", "data.minKey": MinKey}},
{
$graphLookup: {
from: "system.healthlog",
startWith: "$data.minKey",
connectToField: "data.minKey",
connectFromField: "data.maxKey",
as: "batchLimits",
restrictSearchWithMatch: {"operation": "dbCheckBatch"}
}
},
{$match: {"batchLimits.data.maxKey": MaxKey}}
]);
assert(completeCoverage.hasNext(), "dbCheck batches do not cover full key range");
}
}
// Check that the total of all batches in the health log on `conn` is equal to the total number
// of documents and bytes in `coll`.
// Returns a document with fields "totalDocs" and "totalBytes", representing the total size of
// the batches in the health log.
function healthLogCounts(healthlog) {
// These tests only run on debug builds because they rely on dbCheck health-logging
// all info-level batch results.
const debugBuild = healthlog.getDB().getSiblingDB('admin').adminCommand('buildInfo').debug;
if (!debugBuild) {
return;
}
let result = healthlog.aggregate([
{$match: {"operation": "dbCheckBatch"}},
{
$group: {
"_id": null,
"totalDocs": {$sum: "$data.count"},
"totalBytes": {$sum: "$data.bytes"}
}
}
]);
assert(result.hasNext(), "dbCheck put no batches in health log");
return result.next();
}
function checkTotalCounts(conn, coll) {
// These tests only run on debug builds because they rely on dbCheck health-logging
// all info-level batch results.
const debugBuild = conn.getDB('admin').adminCommand('buildInfo').debug;
if (!debugBuild) {
return;
}
let result = healthLogCounts(conn.getDB("local").system.healthlog);
assert.eq(result.totalDocs, coll.count(), "dbCheck batches do not count all documents");
// Calculate the size on the client side, because collection.dataSize is not necessarily the
// sum of the document sizes.
let size = coll.find().toArray().reduce((x, y) => x + bsonsize(y), 0);
assert.eq(result.totalBytes, size, "dbCheck batches do not count all bytes");
}
// First check behavior when everything is consistent.
function simpleTestConsistent() {
let primary = replSet.getPrimary();
clearLog();
assert.neq(primary, undefined);
let db = primary.getDB(dbName);
assert.commandWorked(db.runCommand({"dbCheck": multiBatchSimpleCollName}));
awaitDbCheckCompletion(db, multiBatchSimpleCollName);
checkLogAllConsistent(primary);
checkTotalCounts(primary, db[multiBatchSimpleCollName]);
forEachSecondary(function(secondary) {
checkLogAllConsistent(secondary);
checkTotalCounts(secondary, secondary.getDB(dbName)[multiBatchSimpleCollName]);
});
}
function simpleTestNonSnapshot() {
let primary = replSet.getPrimary();
clearLog();
assert.neq(primary, undefined);
let db = primary.getDB(dbName);
assert.commandWorked(db.runCommand({"dbCheck": multiBatchSimpleCollName, snapshotRead: false}));
awaitDbCheckCompletion(db, multiBatchSimpleCollName);
checkLogAllConsistent(primary);
checkTotalCounts(primary, db[multiBatchSimpleCollName]);
forEachSecondary(function(secondary) {
checkLogAllConsistent(secondary);
checkTotalCounts(secondary, secondary.getDB(dbName)[multiBatchSimpleCollName]);
});
}
// Same thing, but now with concurrent updates.
function concurrentTestConsistent() {
let primary = replSet.getPrimary();
clearLog();
let db = primary.getDB(dbName);
// Add enough documents that dbCheck will take a few seconds.
db[collName].insertMany([...Array(10000).keys()].map(x => ({i: x})));
assert.commandWorked(db.runCommand({"dbCheck": collName}));
let coll = db[collName];
while (db.currentOp().inprog.filter(x => x["desc"] === "dbCheck").length) {
coll.updateOne({}, {"$inc": {"i": 10}});
coll.insertOne({"i": 42});
coll.deleteOne({});
}
awaitDbCheckCompletion(db, collName);
checkLogAllConsistent(primary);
// Omit check for total counts, which might have changed with concurrent updates.
forEachSecondary(secondary => checkLogAllConsistent(secondary, true));
}
simpleTestConsistent();
simpleTestNonSnapshot();
concurrentTestConsistent();
// Test the various other parameters.
function testDbCheckParameters() {
let primary = replSet.getPrimary();
let db = primary.getDB(dbName);
// Clean up for the test.
clearLog();
let docSize = bsonsize({_id: 10});
function checkEntryBounds(start, end) {
forEachNode(function(node) {
// These tests only run on debug builds because they rely on dbCheck health-logging
// all info-level batch results.
const debugBuild = node.getDB('admin').adminCommand('buildInfo').debug;
if (!debugBuild) {
return;
}
let healthlog = node.getDB("local").system.healthlog;
let keyBoundsResult = healthlog.aggregate([
{$match: {operation: "dbCheckBatch"}},
{
$group:
{_id: null, minKey: {$min: "$data.minKey"}, maxKey: {$max: "$data.maxKey"}}
}
]);
assert(keyBoundsResult.hasNext(), "dbCheck put no batches in health log");
const bounds = keyBoundsResult.next();
const counts = healthLogCounts(healthlog);
assert.eq(bounds.minKey, start, "dbCheck minKey field incorrect");
// dbCheck evaluates some exit conditions like maxCount and maxBytes at batch boundary.
// The batch boundary isn't generally deterministic (e.g. can be time-dependent per
// maxBatchTimeMillis) hence the greater-than-or-equal comparisons.
assert.gte(bounds.maxKey, end, "dbCheck maxKey field incorrect");
assert.gte(counts.totalDocs, end - start);
assert.gte(counts.totalBytes, (end - start) * docSize);
});
}
// Run a dbCheck on just a subset of the documents
let start = 1000;
let end = 9000;
assert.commandWorked(
db.runCommand({dbCheck: multiBatchSimpleCollName, minKey: start, maxKey: end}));
awaitDbCheckCompletion(db, multiBatchSimpleCollName, end);
checkEntryBounds(start, end);
// Now, clear the health logs again,
clearLog();
let maxCount = 5000;
// and do the same with a count constraint.
assert.commandWorked(db.runCommand(
{dbCheck: multiBatchSimpleCollName, minKey: start, maxKey: end, maxCount: maxCount}));
// We expect it to reach the count limit before reaching maxKey.
awaitDbCheckCompletion(db, multiBatchSimpleCollName, undefined, undefined, maxCount);
checkEntryBounds(start, start + maxCount);
// Finally, do the same with a size constraint.
clearLog();
let maxSize = maxCount * docSize;
assert.commandWorked(db.runCommand(
{dbCheck: multiBatchSimpleCollName, minKey: start, maxKey: end, maxSize: maxSize}));
awaitDbCheckCompletion(db, multiBatchSimpleCollName, end, maxSize);
checkEntryBounds(start, start + maxCount);
// The remaining tests only run on debug builds because they rely on dbCheck health-logging
// all info-level batch results.
const debugBuild = primary.getDB('admin').adminCommand('buildInfo').debug;
if (!debugBuild) {
return;
}
const healthlog = db.getSiblingDB('local').system.healthlog;
{
// Validate custom maxDocsPerBatch
clearLog();
const maxDocsPerBatch = 100;
assert.commandWorked(
db.runCommand({dbCheck: multiBatchSimpleCollName, maxDocsPerBatch: maxDocsPerBatch}));
const healthlog = db.getSiblingDB('local').system.healthlog;
assert.soon(function() {
const expectedBatches = multiBatchSimpleCollSize / maxDocsPerBatch +
(multiBatchSimpleCollSize % maxDocsPerBatch ? 1 : 0);
return (healthlog.find({"operation": "dbCheckBatch"}).itcount() == expectedBatches);
}, "dbCheck doesn't seem to complete", 60 * 1000);
assert.eq(
db.getSiblingDB('local')
.system.healthlog.find({"operation": "dbCheckBatch", "data.count": maxDocsPerBatch})
.itcount(),
multiBatchSimpleCollSize / maxDocsPerBatch);
}
{
// Validate custom maxBytesPerBatch
clearLog();
const coll = db.getSiblingDB("maxBytesPerBatch").maxBytesPerBatch;
// Insert nDocs, each of which being slightly larger than 1MB, and then run dbCheck with
// maxBytesPerBatch := 1MB
const nDocs = 5;
coll.insertMany([...Array(nDocs).keys()].map(x => ({a: 'a'.repeat(1024 * 1024)})));
const maxBytesPerBatch = 1024 * 1024;
assert.commandWorked(db.getSiblingDB("maxBytesPerBatch").runCommand({
dbCheck: coll.getName(),
maxBytesPerBatch: maxBytesPerBatch
}));
// Confirm dbCheck logs nDocs batches.
assert.soon(function() {
return (healthlog.find({"operation": "dbCheckBatch"}).itcount() == nDocs);
}, "dbCheck doesn't seem to complete", 60 * 1000);
assert.eq(db.getSiblingDB('local')
.system.healthlog.find({"operation": "dbCheckBatch", "data.count": 1})
.itcount(),
nDocs);
}
}
testDbCheckParameters();
// Now, test some unusual cases where the command should fail.
function testErrorOnNonexistent() {
let primary = replSet.getPrimary();
let db = primary.getDB("this-probably-doesnt-exist");
assert.commandFailed(db.runCommand({dbCheck: 1}),
"dbCheck spuriously succeeded on nonexistent database");
db = primary.getDB(dbName);
assert.commandFailed(db.runCommand({dbCheck: "this-also-probably-doesnt-exist"}),
"dbCheck spuriously succeeded on nonexistent collection");
}
function testErrorOnSecondary() {
let secondary = replSet.getSecondary();
let db = secondary.getDB(dbName);
assert.commandFailed(db.runCommand({dbCheck: collName}));
}
function testErrorOnUnreplicated() {
let primary = replSet.getPrimary();
let db = primary.getDB("local");
assert.commandFailed(db.runCommand({dbCheck: "oplog.rs"}),
"dbCheck spuriously succeeded on oplog");
assert.commandFailed(primary.getDB(dbName).runCommand({dbCheck: "system.profile"}),
"dbCheck spuriously succeeded on system.profile");
}
testErrorOnNonexistent();
testErrorOnSecondary();
testErrorOnUnreplicated();
// Test stepdown.
function testSucceedsOnStepdown() {
let primary = replSet.getPrimary();
let db = primary.getDB(dbName);
let nodeId = replSet.getNodeId(primary);
assert.commandWorked(db.runCommand({dbCheck: multiBatchSimpleCollName}));
// Step down the primary.
assert.commandWorked(primary.getDB("admin").runCommand({replSetStepDown: 0, force: true}));
// Wait for the cluster to come up.
replSet.awaitSecondaryNodes();
// Find the node we ran dbCheck on.
db = replSet.getSecondaries()
.filter(function isPreviousPrimary(node) {
return replSet.getNodeId(node) === nodeId;
})[0]
.getDB(dbName);
// Check that it's still responding.
try {
assert.commandWorked(db.runCommand({ping: 1}), "ping failed after stepdown during dbCheck");
} catch (e) {
doassert("cannot connect after dbCheck with stepdown");
}
// And that our dbCheck completed.
assert(dbCheckCompleted(db), "dbCheck failed to terminate on stepdown");
}
testSucceedsOnStepdown();
// Temporarily restart the secondary as a standalone, inject an inconsistency and
// restart it back as a secondary.
function injectInconsistencyOnSecondary(cmd) {
const secondaryConn = replSet.getSecondary();
const secondaryNodeId = replSet.getNodeId(secondaryConn);
replSet.stop(secondaryNodeId, {forRestart: true /* preserve dbPath */});
const standaloneConn = MongoRunner.runMongod({
dbpath: secondaryConn.dbpath,
noCleanData: true,
});
const standaloneDB = standaloneConn.getDB(dbName);
assert.commandWorked(standaloneDB.runCommand(cmd));
// Shut down the secondary and restart it as a member of the replica set.
MongoRunner.stopMongod(standaloneConn);
replSet.start(secondaryNodeId, {}, true /*restart*/);
replSet.awaitNodesAgreeOnPrimaryNoAuth();
}
// Just add an extra document, and test that it catches it.
function simpleTestCatchesExtra() {
{
const primary = replSet.getPrimary();
const db = primary.getDB(dbName);
db[collName].drop();
clearLog();
// Create the collection on the primary.
db.createCollection(collName, {validationLevel: "off"});
}
replSet.awaitReplication();
injectInconsistencyOnSecondary({insert: collName, documents: [{}]});
replSet.awaitReplication();
{
const primary = replSet.getPrimary();
const db = primary.getDB(dbName);
assert.commandWorked(db.runCommand({dbCheck: collName}));
awaitDbCheckCompletion(db, collName);
}
assert.soon(function() {
return (replSet.getSecondary()
.getDB("local")
.system.healthlog.find({"operation": "dbCheckStop"})
.itcount() === 1);
}, "dbCheck didn't complete on secondary");
const errors = replSet.getSecondary().getDB("local").system.healthlog.find(
{operation: /dbCheck.*/, severity: "error"});
assert.eq(errors.count(),
1,
"expected exactly 1 inconsistency after single inconsistent insertion, found: " +
JSON.stringify(errors.toArray()));
}
simpleTestCatchesExtra();
replSet.stopSet();
})();