mirror of https://github.com/mongodb/mongo
201 lines
7.3 KiB
JavaScript
201 lines
7.3 KiB
JavaScript
/**
|
|
* Tests the multi writes when a migrations occur in between a yield and resume.
|
|
* @tags: [
|
|
* requires_sharding,
|
|
* assumes_balancer_off,
|
|
* requires_fcv_82,
|
|
* ]
|
|
*/
|
|
|
|
import {configureFailPoint} from "jstests/libs/fail_point_util.js";
|
|
import {funWithArgs} from "jstests/libs/parallel_shell_helpers.js";
|
|
import {ShardingTest} from "jstests/libs/shardingtest.js";
|
|
|
|
// Define the collection name and database
|
|
const kDbName = "test_db";
|
|
const kCollName = "test_coll";
|
|
const kNs = kDbName + "." + kCollName;
|
|
const kNumDocs = 100;
|
|
const kDataChunkSplit = kNumDocs / 2;
|
|
const kEmptyChunk = -10;
|
|
const kDataChunk1 = kDataChunkSplit - 10;
|
|
const kDataChunk2 = kDataChunkSplit + 10;
|
|
|
|
// Configure 'internalQueryExecYieldIterations' on both shards such that operations will yield on
|
|
// each 10th PlanExecuter iteration.
|
|
const st = new ShardingTest({
|
|
shards: 3,
|
|
rs: {setParameter: {internalQueryExecYieldIterations: 11}},
|
|
other: {enableBalancer: false},
|
|
});
|
|
|
|
/*
|
|
* Sets up the data distribution for testing migration conflicts
|
|
* shard0: [Min, -1], [0,50[
|
|
* shard1: [50,100]
|
|
* shard2: []
|
|
* on shard0 the first chunk is always empty. That chunk will be moved and used to bump the shard
|
|
* version.
|
|
*/
|
|
function setupDataDistribution() {
|
|
const mongos = st.s;
|
|
const db = mongos.getDB(kDbName);
|
|
const coll = db.getCollection(kCollName);
|
|
|
|
db.dropDatabase();
|
|
|
|
// Set shard0 as primary
|
|
assert.commandWorked(mongos.adminCommand({enableSharding: kDbName, primaryShard: st.shard0.shardName}));
|
|
|
|
assert.commandWorked(mongos.adminCommand({shardCollection: kNs, key: {x: 1}}));
|
|
|
|
// Insert kNumDocs documents, evenly distributed across the two shards
|
|
assert.commandWorked(mongos.adminCommand({split: kNs, middle: {x: kDataChunkSplit}}));
|
|
assert.commandWorked(mongos.adminCommand({split: kNs, middle: {x: -1}}));
|
|
|
|
assert.commandWorked(
|
|
mongos.adminCommand({moveChunk: kNs, find: {x: kDataChunk2}, to: st.shard1.shardName, _waitForDelete: true}),
|
|
);
|
|
|
|
let bulk = coll.initializeUnorderedBulkOp();
|
|
for (let i = 0; i < kNumDocs; i++) {
|
|
bulk.insert({x: i, value: "test value " + i});
|
|
}
|
|
assert.commandWorked(bulk.execute());
|
|
|
|
// Verify distribution
|
|
assert.eq(kNumDocs, coll.countDocuments({}));
|
|
jsTest.log(
|
|
`Data distribution setup complete: ${kNumDocs} documents, ${kNumDocs / 2} on shard0, ${kNumDocs / 2} on shard1`,
|
|
);
|
|
|
|
// Ensure the router has latest routing info.
|
|
coll.find({});
|
|
}
|
|
|
|
// Configure fail point to hang on yield. While hanging, we can perform a migration. On resume,
|
|
// the shard will find a new version and throw a StaleConfig. Specifically for multi write, we
|
|
// want that StaleConfig to result in a QueryPlanKilled or silently succeed based on the cluster
|
|
// configuration.
|
|
function runTest(st, testCaseFun) {
|
|
// Cleanup - reset data distribution
|
|
setupDataDistribution();
|
|
|
|
const mongos = st.s;
|
|
const db = mongos.getDB(kDbName);
|
|
const coll = db.getCollection(kCollName);
|
|
|
|
const fpShard0UpdateHang = configureFailPoint(st.rs0.getPrimary(), "setYieldAllLocksHang", {
|
|
namespace: coll.getFullName(),
|
|
});
|
|
|
|
const writeShell = startParallelShell(
|
|
funWithArgs(testCaseFun, kDbName, kCollName, kDataChunk1, kDataChunk2),
|
|
mongos.port,
|
|
);
|
|
|
|
// Wait for the fail points to be hit
|
|
jsTest.log("Waiting for operation to yield");
|
|
fpShard0UpdateHang.wait();
|
|
jsTest.log("Operation yielded, running migration...");
|
|
|
|
// Migrate a chunk from shard0 to shard2
|
|
jsTest.log("Starting migration.");
|
|
assert.commandWorked(mongos.adminCommand({moveChunk: kNs, find: {x: kEmptyChunk}, to: st.shard2.shardName}));
|
|
jsTest.log("Completed migration.");
|
|
|
|
jsTest.log("Migration complete, resuming operation");
|
|
fpShard0UpdateHang.off();
|
|
jsTest.log("Waiting for operation to complete");
|
|
|
|
writeShell();
|
|
jsTest.log("Operation completed");
|
|
}
|
|
|
|
jsTest.log("updateMany multi:true with concurrent migration should fail if targets 1 shard");
|
|
{
|
|
// The multi write targets 1 shard. We expect the operation to use a valid ShardVersion and
|
|
// throw StaleConfig. However, since we can't safely retry updateMany, the operation will fail
|
|
// with QueryPlanKilled instead.
|
|
let testCase = function (dbName, collName, chunk1, chunk2) {
|
|
assert.throwsWithCode(() => {
|
|
db.getSiblingDB(dbName)[collName].updateMany(
|
|
{x: {$lt: chunk1}}, // Target only shard0
|
|
{$set: {updated: true}},
|
|
);
|
|
}, ErrorCodes.QueryPlanKilled);
|
|
};
|
|
runTest(st, testCase);
|
|
}
|
|
|
|
jsTest.log("updateMany multi:true with concurrent migration should succeed if targets N shards");
|
|
{
|
|
// The multi write targets multiple shards. We expect the operation to use ShardVersion::IGNORED
|
|
// and always succeed indipendently that a placement change happened during the execution.
|
|
let testCase = function (dbName, collName, chunk1, chunk2) {
|
|
assert.commandWorked(
|
|
db.getSiblingDB(dbName)[collName].updateMany(
|
|
{x: {$gt: -1}}, // Targets all documents
|
|
{$set: {updated: true}},
|
|
),
|
|
);
|
|
};
|
|
runTest(st, testCase);
|
|
}
|
|
|
|
jsTest.log("deleteMany multi:true with concurrent migration succeed if targets 1 shard");
|
|
{
|
|
// The deleteMany targets 1 shard. Like updateMany, we expect the operation to use a valid
|
|
// ShardVersion and throw StaleConfig, resulting in QueryPlanKilled error.
|
|
let testCase = function (dbName, collName, chunk1, chunk2) {
|
|
assert.commandWorked(db.getSiblingDB(dbName)[collName].deleteMany({x: {$lt: chunk1}})); // Targets all documents
|
|
};
|
|
runTest(st, testCase);
|
|
}
|
|
|
|
jsTest.log("deleteMany multi:true with concurrent migration should succeed if targets N shards");
|
|
{
|
|
// The deleteMany targets multiple shards. We expect the operation to use ShardVersion::IGNORED
|
|
// and succeed despite the placement change during execution.
|
|
let testCase = function (dbName, collName, chunk1, chunk2) {
|
|
assert.commandWorked(db.getSiblingDB(dbName)[collName].deleteMany({x: {$gt: -1}})); // Targets all documents
|
|
};
|
|
runTest(st, testCase);
|
|
}
|
|
|
|
// Enable the cluster parameter that changes how distributed multi-writes targets multiple shards
|
|
jsTest.log("Enabling onlyTargetDataOwningShardsForMultiWrites");
|
|
assert.commandWorked(
|
|
st.s.adminCommand({setClusterParameter: {onlyTargetDataOwningShardsForMultiWrites: {enabled: true}}}),
|
|
);
|
|
assert.commandWorked(st.s.adminCommand({getClusterParameter: "onlyTargetDataOwningShardsForMultiWrites"}));
|
|
|
|
jsTest.log(
|
|
"updateMany with concurrent migration should fail with QueryPlanKilled when onlyTargetDataOwningShardsForMultiWrites enabled",
|
|
);
|
|
{
|
|
let testCase = function (dbName, collName, chunk1, chunk2) {
|
|
assert.throwsWithCode(() => {
|
|
assert.commandWorked(
|
|
db.getSiblingDB(dbName)[collName].updateMany(
|
|
{x: {$gt: -1}}, // Targets all documents
|
|
{$set: {updated: true}},
|
|
),
|
|
);
|
|
}, ErrorCodes.QueryPlanKilled);
|
|
};
|
|
runTest(st, testCase);
|
|
}
|
|
|
|
jsTest.log(
|
|
"deleteMany with concurrent migration should still return ok when onlyTargetDataOwningShardsForMultiWrites enabled",
|
|
);
|
|
{
|
|
let testCase = function (dbName, collName, chunk1, chunk2) {
|
|
assert.commandWorked(db.getSiblingDB(dbName)[collName].deleteMany({}));
|
|
};
|
|
runTest(st, testCase);
|
|
}
|
|
|
|
st.stop();
|