mongo/jstests/aggregation/query_limits_test.js

/**
 * Test that larger queries do not fail. This includes larger aggregation pipelines, as well as
 * large $match/$project stages, deeply nested paths, and many predicates in an $and/$or.
 * The size of these queries was found by trial and error until we reach the BSON size limit.
 *
 * @tags: [
 *   # Can't wrap queries in facets without going past max BSON depth.
 *   do_not_wrap_aggregations_in_facets,
 *   not_allowed_with_signed_security_token,
 *   # Can't use multiplanning, as it leads to query serialization that fails because of max BSON
 *   # size.
 *   does_not_support_multiplanning_single_solutions
 * ]
 */

import {checkCascadesOptimizerEnabled} from "jstests/libs/optimizer_utils.js";

(function() {
"use strict";

// Only run this test for debug=off opt=on without sanitizers active. With any of these activated,
// the stack frames are larger and can more easily stack overflow.
const debugBuild = db.adminCommand("buildInfo").debug;
if (debugBuild || !_optimizationsEnabled() || _isAddressSanitizerActive() ||
    _isLeakSanitizerActive() || _isThreadSanitizerActive() ||
    _isUndefinedBehaviorSanitizerActive()) {
    jsTestLog("Returning early because debug is on, opt is off, or a sanitizer is enabled.");
    return;
}

const isBonsaiEnabled = checkCascadesOptimizerEnabled(db);

const coll = db.query_limits_test;
coll.drop();

// Multikey so we can't apply any non-multikey optimizations to stress as much as possible.
assert.commandWorked(coll.insert({_id: 0, a: [0, 1], b: [2, 3], c: 4, d: 5, object: {}}));

function range(high) {
    return [...Array(high).keys()];
}

function runAgg(pipeline) {
    // Run pipeline to make sure it doesn't fail.
    const result = coll.aggregate(pipeline).toArray();
}

// Construct a {$match: {a: {$in: [0, 1, 2, ...]}}}.
function testLargeIn() {
    jsTestLog("Testing large $in");
    // Int limit is different than double limit.
    const filterValsInts = range(1200000).map(i => NumberInt(i));
    runAgg([{$match: {a: {$in: filterValsInts}}}]);

    const filterValsDoubles = range(1000000).map(i => i * 1.0);
    runAgg([{$match: {a: {$in: filterValsDoubles}}}]);
}

// Construct a {$project: {a0: 1, a1: 1, ...}}.
function testLargeProject() {
    jsTestLog("Testing large $project");
    const projectFields = {};
    range(1000000).forEach(function(i) {
        projectFields["a" + i] = NumberInt(1);
    });
    runAgg([{$project: projectFields}]);

    const pathSize = 1000000;
    let nestedProjectField = "a0";
    for (let i = 1; i < pathSize; i++) {
        nestedProjectField += ".a" + i;
    }
    runAgg([{$project: {nestedProjectField: 1}}]);
}

// Run $and and $or with many different types of predicates.
function testLargeAndOrPredicates() {
    // TODO: SERVER-80735 remove this early return once this ticket is done. This is a
    // Bonsai-specific issues with PSR.
    if (isBonsaiEnabled) {
        return;
    }
    jsTestLog("Testing large $and/$or predicates");

    // Large $match of the form {$match: {a0: 1, a1: 1, ...}}
    const largeMatch = {};
    range(800000).forEach(function(i) {
        largeMatch["a" + i] = NumberInt(1);
    });
    runAgg([{$match: largeMatch}]);

    function intStream(n) {
        return range(n).map(i => NumberInt(i));
    }

    const andOrFilters = [
        // Plain a=i filter.
        intStream(500000).map(function(i) {
            return {a: i};
        }),
        // a_i = i filter. Different field for each value.
        intStream(500000).map(function(i) {
            const field = "a" + i;
            return {[field]: i};
        }),
        // Mix of lt and gt with the same field.
        intStream(500000).map(function(i) {
            const predicate = i % 2 ? {$lt: i} : {$gt: i};
            return {a: predicate};
        }),
        // Mix of lt and gt with different fields.
        intStream(400000).map(function(i) {
            const field = "a" + i;
            const predicate = i % 2 ? {$lt: i} : {$gt: i};
            return {[field]: predicate};
        }),
        // Mix of lt and gt wrapped in not with different fields.
        intStream(300000).map(function(i) {
            const field = "a" + i;
            const predicate = i % 2 ? {$lt: i} : {$gt: i};
            return {[field]: {$not: predicate}};
        }),
        // $exists on different fields.
        intStream(400000).map(function(i) {
            const field = "a" + i;
            return {[field]: {$exists: true}};
        }),
        intStream(400000).map(function(i) {
            const field = "a" + i;
            return {[field]: {$exists: false}};
        })
    ];
    for (const m of andOrFilters) {
        runAgg([{$match: {$and: m}}]);
        runAgg([{$match: {$or: m}}]);
    }
}

// Test deeply nested queries.
function testDeeplyNestedPath() {
    jsTestLog("Testing deeply nested $match");
    let deepQuery = {a: {$eq: 1}};
    const depth = 72;
    for (let i = 0; i < depth; i++) {
        deepQuery = {a: {$elemMatch: deepQuery}};
    }
    runAgg([{$match: deepQuery}]);
}

/**
 * TODO SERVER-83927: When Bonsai is enabled, Filter -> Sargable conversion is disabled, and
 * pipeline optimization is also disabled, we should avoid generating 1000 match expressions. This
 * is because we don't call optimize() on the pipeline when the failpoint is enabled; this means
 * that instead of a single DocumentSourceMatch with 1000 Match expressions in it, ABT translation
 * recieves 1000 DocumentSourceMatches. It directly translates each one to a FilterNode. Without
 * converting them to SargableNodes, they never get merged. This in turn stresses the optimizer,
 * which can cause a stack overflow.
 */
function skipMatchCase() {
    const isSargableWhenNoIndexesDisabled =
        db.adminCommand({getParameter: 1, internalCascadesOptimizerDisableSargableWhenNoIndexes: 1})
            .internalCascadesOptimizerDisableSargableWhenNoIndexes;
    const isPipelineOptimizationFailpointEnabled = db.adminCommand({
                                                         getParameter: 1,
                                                         "failpoint.disablePipelineOptimization": 1
                                                     })["failpoint.disablePipelineOptimization"]
                                                       .mode;
    return isBonsaiEnabled && isSargableWhenNoIndexesDisabled &&
        isPipelineOptimizationFailpointEnabled;
}

// Test pipeline length.
function testPipelineLimits() {
    jsTestLog("Testing large agg pipelines");
    const pipelineLimit =
        assert.commandWorked(db.adminCommand({getParameter: 1, internalPipelineLengthLimit: 1}))
            .internalPipelineLengthLimit;
    let stages = [
        {$limit: 1},
        {$skip: 1},
        {$sort: {a: 1}},
        {$unwind: "$a"},
        {$match: {a: {$mod: [4, 2]}}},
        {$group: {_id: "$a"}},
        {$addFields: {c: {$add: ["$c", "$d"]}}},
        {$addFields: {a: 5}},
        {$project: {a: 1}},
    ];
    if (!skipMatchCase()) {
        stages.push({$match: {a: 1}});
    } else {
        jsTestLog(
            "Skipping $match case when 'tryBonsai' + pipeline optimization disabled + Filter -> Sargable conversion disabled.");
    }

    for (const stage of stages) {
        const pipeline = range(pipelineLimit).map(_ => stage);
        jsTestLog(stage);
        runAgg(pipeline);
    }
}

/*
 * Generates a $match query with specified branchingFactor and maxDepth of the form
 * {$and: [{$or: [... $and ...]}, ... (length branchingFactor) ...]}
 * Uses unique field names across the generated query.
 */
let fieldIndex = 0;
function generateNestedAndOrHelper(type, branchingFactor, maxDepth) {
    if (maxDepth === 0) {
        const field = 'a' + fieldIndex;
        const query = {[field]: NumberInt(fieldIndex)};
        fieldIndex++;
        return query;
    }

    const oppositeType = type === '$and' ? '$or' : '$and';
    const children = [];
    for (let i = 0; i < branchingFactor; i++) {
        const childQuery = generateNestedAndOrHelper(oppositeType, branchingFactor, maxDepth - 1);
        children.push(childQuery);
    }

    return {[type]: children};
}

function generateNestedAndOr(type, branchingFactor, maxDepth) {
    fieldIndex = 0;
    return generateNestedAndOrHelper(type, branchingFactor, maxDepth);
}

function testNestedAndOr() {
    jsTestLog("Testing nested $and/$or");
    for (const topLevelType of ['$and', '$or']) {
        // Test different types of nested queries
        let [branchingFactor, maxDepth] = [3, 10];
        const deepNarrowQuery = generateNestedAndOr(topLevelType, branchingFactor, maxDepth);
        runAgg([{$match: deepNarrowQuery}]);

        [branchingFactor, maxDepth] = [10, 5];
        const shallowWideQuery = generateNestedAndOr(topLevelType, branchingFactor, maxDepth);
        runAgg([{$match: shallowWideQuery}]);
    }
}

function testLargeSetFunction() {
    jsTestLog("Testing large $setIntersection");

    const fieldExprs = [];
    for (let j = 1; j <= 13000; j++) {
        fieldExprs.push("$a" + j);
    }
    const pipeline = [{$project: {a: {$setIntersection: fieldExprs}}}, {$group: {_id: "$a"}}];
    runAgg(pipeline);
}

const tests = [
    testLargeIn,
    testLargeProject,
    testLargeAndOrPredicates,
    testDeeplyNestedPath,
    testNestedAndOr,
    testPipelineLimits,
    testLargeSetFunction
];

for (const test of tests) {
    test();
}
})();