mongo/jstests/sharding/recovering_secondaryok.js

132 lines
3.8 KiB
JavaScript

/**
* This tests that secondaryOk'd queries in sharded setups get correctly routed when a secondary
* goes into RECOVERING state, and don't break
*/
// Shard secondaries are restarted, which may cause that shard's primary to stepdown while it does
// not see the secondaries. Either the primary connection gets reset, or the primary could change.
TestData.skipCheckingUUIDsConsistentAcrossCluster = true;
import {awaitRSClientHosts} from "jstests/replsets/rslib.js";
import {ReplSetTest} from "jstests/libs/replsettest.js";
import {ShardingTest} from "jstests/libs/shardingtest.js";
let shardTest = new ShardingTest({name: "recovering_secondaryok", shards: 2, mongos: 2, other: {rs: true}});
let mongos = shardTest.s0;
let mongosSOK = shardTest.s1;
mongosSOK.setSecondaryOk();
const dbName = "test";
let dbase = mongos.getDB(dbName);
let coll = dbase.getCollection("foo");
let collSOk = mongosSOK.getCollection("" + coll);
let rsA = shardTest.rs0;
let rsB = shardTest.rs1;
assert.commandWorked(rsA.getPrimary().getDB("test_a").dummy.insert({x: 1}));
assert.commandWorked(rsB.getPrimary().getDB("test_b").dummy.insert({x: 1}));
rsA.awaitReplication();
rsB.awaitReplication();
print("1: initial insert");
assert.commandWorked(coll.save({_id: -1, a: "a", date: new Date()}));
assert.commandWorked(coll.save({_id: 1, b: "b", date: new Date()}));
print("2: shard collection");
shardTest.shardColl(
coll,
/* shardBy */ {_id: 1},
/* splitAt */ {_id: 0},
/* move chunk */ {_id: 0},
/* dbname */ null,
/* waitForDelete */ true,
);
print("3: test normal and secondaryOk queries");
// Make shardA and rsA the same
let shardA = shardTest.getShard(coll, {_id: -1});
let shardAColl = shardA.getCollection("" + coll);
if (shardA.name == rsB.getURL()) {
let swap = rsB;
rsB = rsA;
rsA = swap;
}
rsA.awaitReplication();
rsB.awaitReplication();
// Because of async migration cleanup, we need to wait for this condition to be true
assert.soon(function () {
return coll.find().itcount() == collSOk.find().itcount();
});
assert.eq(shardAColl.find().itcount(), 1);
assert.eq(shardAColl.findOne()._id, -1);
print("5: make one of the secondaries RECOVERING");
let secs = rsA.getSecondaries();
let goodSec = secs[0];
let badSec = secs[1];
assert.commandWorked(badSec.adminCommand("replSetMaintenance"));
rsA.waitForState(badSec, ReplSetTest.State.RECOVERING);
print("6: stop non-RECOVERING secondary");
rsA.stop(goodSec);
print("7: check our regular and secondaryOk query");
assert.eq(2, coll.find().itcount());
assert.eq(2, collSOk.find().itcount());
print("8: restart both our secondaries clean");
rsA.getSecondaries().forEach((secondary) =>
rsA.restart(secondary, {remember: true, startClean: true}, undefined, 5 * 60 * 1000),
);
print("9: wait for recovery");
rsA.awaitSecondaryNodes(5 * 60 * 1000);
print("10: check our regular and secondaryOk query");
// We need to make sure our nodes are considered accessible from mongos - otherwise we fail
// See SERVER-7274
awaitRSClientHosts(coll.getMongo(), rsA.nodes, {ok: true});
awaitRSClientHosts(coll.getMongo(), rsB.nodes, {ok: true});
// We need to make sure at least one secondary is accessible from mongos - otherwise we fail
// See SERVER-7699
awaitRSClientHosts(collSOk.getMongo(), [rsA.getSecondaries()[0]], {secondary: true, ok: true});
awaitRSClientHosts(collSOk.getMongo(), [rsB.getSecondaries()[0]], {secondary: true, ok: true});
print("SecondaryOk Query...");
let sOKCount = collSOk.find().itcount();
let collCount = null;
try {
print("Normal query...");
collCount = coll.find().itcount();
} catch (e) {
printjson(e);
// There may have been a stepdown caused by step 8, so we run this twice in a row. The first
// time can error out.
print("Error may have been caused by stepdown, try again.");
collCount = coll.find().itcount();
}
assert.eq(collCount, sOKCount);
shardTest.stop();