mongo/jstests/noPassthrough/query/query_oplogreplay.js

// Test oplog queries that can be optimized with oplogReplay.
// @tags: [requires_replication, requires_capped]

import {getPlanStage, getWinningPlanFromExplain} from "jstests/libs/query/analyze_plan.js";
import {ReplSetTest} from "jstests/libs/replsettest.js";

let replSet = new ReplSetTest({nodes: 1});
replSet.startSet();
replSet.initiate();
let conn = replSet.getPrimary();
let testDB = conn.getDB("test");
let oplog = conn.getDB("local").oplog.rs;

/**
 * Helper function for making timestamps with the property that if i < j, then makeTS(i) <
 * makeTS(j).
 */
function makeTS(i) {
    return Timestamp(1000, i);
}

function longToTs(i) {
    return Timestamp(i.top, i.bottom);
}

// The first object is just a dummy element in order to make both index and id match in the tests
// and avoid off-by-1 errors
let timestamps = [{}];

for (let i = 1; i <= 100; i++) {
    let res = testDB.runCommand({insert: jsTestName(), documents: [{_id: i, ts: makeTS(i)}]});
    let ts = res.opTime.ts;
    timestamps.push(ts);
    assert.commandWorked(res);
}

const collNs = `test.${jsTestName()}`;

// A $gt query on just the 'ts' field should return the next document after the timestamp.
let cursor = oplog.find({ns: collNs, ts: {$gt: timestamps[20]}});
assert.eq(21, cursor.next().o["_id"]);
assert.eq(22, cursor.next().o["_id"]);

// A $gte query on the 'ts' field should include the timestamp.
cursor = oplog.find({ns: collNs, ts: {$gte: timestamps[20]}});
assert.eq(20, cursor.next().o["_id"]);
assert.eq(21, cursor.next().o["_id"]);

// An $eq query on the 'ts' field should return the single record with the timestamp.
cursor = oplog.find({ns: collNs, ts: {$eq: timestamps[20]}});
assert.eq(20, cursor.next().o["_id"]);
assert(!cursor.hasNext());

// An AND with both a $gt and $lt query on the 'ts' field will correctly return results in
// the proper bounds.
cursor = oplog.find({$and: [{ns: collNs}, {ts: {$lt: timestamps[5]}}, {ts: {$gt: timestamps[1]}}]});
assert.eq(2, cursor.next().o["_id"]);
assert.eq(3, cursor.next().o["_id"]);
assert.eq(4, cursor.next().o["_id"]);
assert(!cursor.hasNext());

// An AND with multiple predicates on the 'ts' field correctly returns results on the
// tightest range.
cursor = oplog.find({
    $and: [
        {ns: collNs},
        {ts: {$gte: timestamps[2]}},
        {ts: {$gt: timestamps[3]}},
        {ts: {$lte: timestamps[7]}},
        {ts: {$lt: timestamps[7]}},
    ],
});
assert.eq(4, cursor.next().o["_id"]);
assert.eq(5, cursor.next().o["_id"]);
assert.eq(6, cursor.next().o["_id"]);
assert(!cursor.hasNext());

// An AND with an $eq predicate in conjunction with other bounds correctly returns one
// result.
cursor = oplog.find({
    $and: [
        {ns: collNs},
        {ts: {$gte: timestamps[1]}},
        {ts: {$gt: timestamps[2]}},
        {ts: {$eq: timestamps[5]}},
        {ts: {$lte: timestamps[8]}},
        {ts: {$lt: timestamps[8]}},
    ],
});
assert.eq(5, cursor.next().o["_id"]);
assert(!cursor.hasNext());

// An $eq query stops scanning after passing the max timestamp.
let res = oplog.find({ns: collNs, ts: {$eq: timestamps[10]}}).explain("executionStats");
assert.commandWorked(res);
// We expect to be able to seek directly to the entry with a 'ts' of 10.
assert.lte(res.executionStats.totalDocsExamined, 2, res);
let collScanStage = getPlanStage(getWinningPlanFromExplain(res), "COLLSCAN");
assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res));
assert.eq(timestamps[10], longToTs(collScanStage.maxRecord), res);

// An AND with an $lt predicate stops scanning after passing the max timestamp.
res = oplog.find({$and: [{ts: {$gte: timestamps[51]}}, {ts: {$lt: timestamps[60]}}]}).explain("executionStats");
assert.commandWorked(res);
assert.lte(res.executionStats.totalDocsExamined, res.executionStats.nReturned + 2, res);
collScanStage = getPlanStage(getWinningPlanFromExplain(res), "COLLSCAN");
assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res));
assert.eq(timestamps[60], longToTs(collScanStage.maxRecord), res);
assert.eq(timestamps[51], longToTs(collScanStage.minRecord), res);

// An AND with an $lte predicate stops scanning after passing the max timestamp.
res = oplog.find({$and: [{ts: {$gte: timestamps[51]}}, {ts: {$lte: timestamps[60]}}]}).explain("executionStats");
assert.commandWorked(res);
assert.lte(res.executionStats.totalDocsExamined, res.executionStats.nReturned + 2, res);
collScanStage = getPlanStage(getWinningPlanFromExplain(res), "COLLSCAN");
assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res));
assert.eq(timestamps[60], longToTs(collScanStage.maxRecord), res);
assert.eq(timestamps[51], longToTs(collScanStage.minRecord), res);

// The max timestamp is respected even when the min timestamp is smaller than the lowest
// timestamp in the collection.
res = oplog
    .find({$and: [{ns: collNs}, {ts: {$gte: timestamps[0]}}, {ts: {$lte: timestamps[10]}}]})
    .explain("executionStats");
assert.commandWorked(res);
collScanStage = getPlanStage(getWinningPlanFromExplain(res), "COLLSCAN");
assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res));
assert.eq(timestamps[10], longToTs(collScanStage.maxRecord), res);

// An AND with redundant $eq/$lt/$lte predicates stops scanning after passing the max
// timestamp.
res = oplog
    .find({
        $and: [
            {ns: collNs},
            {ts: {$gte: timestamps[0]}},
            {ts: {$lte: timestamps[10]}},
            {ts: {$eq: timestamps[5]}},
            {ts: {$lt: timestamps[20]}},
        ],
    })
    .explain("executionStats");
assert.commandWorked(res);
// We expect to be able to seek directly to the entry with a 'ts' of 5.
collScanStage = getPlanStage(getWinningPlanFromExplain(res), "COLLSCAN");
assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res));
assert.eq(timestamps[5], longToTs(collScanStage.maxRecord), res);
assert.eq(timestamps[5], longToTs(collScanStage.minRecord), res);

// An $eq query for a non-existent timestamp scans a single oplog document.
res = oplog.find({ns: collNs, ts: {$eq: makeTS(200)}}).explain("executionStats");
assert.commandWorked(res);
// We expect to be able to seek directly to the end of the oplog.
collScanStage = getPlanStage(getWinningPlanFromExplain(res), "COLLSCAN");
assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res));
assert.eq(makeTS(200), longToTs(collScanStage.maxRecord), res);

// When the filter matches the last document within the timestamp range, the collection scan
// examines at most one more document.
res = oplog
    .find({$and: [{ns: collNs}, {ts: {$gte: timestamps[4]}}, {ts: {$lte: timestamps[8]}}]})
    .explain("executionStats");
assert.commandWorked(res);
// We expect to be able to seek directly to the start of the 'ts' range.
collScanStage = getPlanStage(getWinningPlanFromExplain(res), "COLLSCAN");
assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res));
assert.eq(timestamps[8], longToTs(collScanStage.maxRecord), res);

// A filter with only an upper bound predicate on 'ts' stops scanning after
// passing the max timestamp.
res = oplog.find({ns: collNs, ts: {$lt: timestamps[4]}}).explain("executionStats");
assert.commandWorked(res);
collScanStage = getPlanStage(getWinningPlanFromExplain(res), "COLLSCAN");
assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res));
assert.eq(timestamps[4], longToTs(collScanStage.maxRecord), res);

// Oplog replay optimization should work with projection.
res = oplog.find({ns: collNs, ts: {$lte: timestamps[4]}}).projection({op: 0});
while (res.hasNext()) {
    const next = res.next();
    assert(!next.hasOwnProperty("op"));
    assert(next.hasOwnProperty("ts"));
}
res = res.explain("executionStats");
assert.commandWorked(res);

res = oplog.find({ns: collNs, ts: {$gte: timestamps[90]}}).projection({"op": 0});
while (res.hasNext()) {
    const next = res.next();
    assert(!next.hasOwnProperty("op"));
    assert(next.hasOwnProperty("ts"));
}
res = res.explain("executionStats");
assert.commandWorked(res);

// Oplog replay optimization should work with limit.
res = oplog
    .find({$and: [{ns: collNs}, {ts: {$gte: timestamps[4]}}, {ts: {$lte: timestamps[8]}}]})
    .limit(2)
    .explain("executionStats");
assert.commandWorked(res);
assert.eq(2, res.executionStats.totalDocsExamined);
collScanStage = getPlanStage(res.executionStats.executionStages, "COLLSCAN");
assert.eq(2, collScanStage.nReturned, res);

// A query over both 'ts' and '_id' should only pay attention to the 'ts' field for finding
// the oplog start (SERVER-13566).
cursor = oplog.find({ns: collNs, ts: {$gte: timestamps[20]}, "o._id": 25});
assert.eq(25, cursor.next().o["_id"]);
assert(!cursor.hasNext());

// 'oplogreplay' flag is allowed but ignored on the oplog collection.
assert.commandWorked(oplog.runCommand({find: oplog.getName(), oplogReplay: true}));

// 'oplogreplay' flag is allowed but ignored on capped collections.
const cappedColl = testDB.cappedColl_jstests_query_oplogreplay;
cappedColl.drop();
assert.commandWorked(testDB.createCollection(cappedColl.getName(), {capped: true, size: 16 * 1024}));
for (let i = 1; i <= 100; i++) {
    assert.commandWorked(cappedColl.insert({_id: i, ts: makeTS(i)}));
}
res = cappedColl.runCommand({
    explain: {find: cappedColl.getName(), filter: {ts: {$eq: makeTS(200)}}, oplogReplay: true},
});
assert.commandWorked(res);
assert.eq(res.executionStats.totalDocsExamined, 100);

// Ensure oplog replay hack does not work for backward scans.
res = oplog
    .find({ns: collNs, ts: {$lt: timestamps[4]}})
    .sort({$natural: -1})
    .explain("executionStats");
assert.commandWorked(res);
assert.gte(res.executionStats.totalDocsExamined, 100, res);
collScanStage = getPlanStage(getWinningPlanFromExplain(res), "COLLSCAN");
assert.neq(null, collScanStage, "no collection scan found in explain output: " + tojson(res));

replSet.stopSet();