mongo/jstests/concurrency/fsm_libs/runner.js

load('jstests/concurrency/fsm_libs/assert.js');
import {Cluster} from "jstests/concurrency/fsm_libs/cluster.js";
load('jstests/concurrency/fsm_libs/parse_config.js');
import {ThreadManager} from "jstests/concurrency/fsm_libs/thread_mgr.js";
load('jstests/concurrency/fsm_utils/name_utils.js');  // for uniqueCollName and uniqueDBName

export const runner = (function() {
    function validateExecutionMode(mode) {
        var allowedKeys = ['composed', 'parallel', 'serial'];

        Object.keys(mode).forEach(function(option) {
            assert.contains(option,
                            allowedKeys,
                            'invalid option: ' + tojson(option) +
                                '; valid options are: ' + tojson(allowedKeys));
        });

        mode.composed = mode.composed || false;
        assert.eq('boolean', typeof mode.composed);

        mode.parallel = mode.parallel || false;
        assert.eq('boolean', typeof mode.parallel);

        mode.serial = mode.serial || false;
        assert.eq('boolean', typeof mode.serial);

        var numEnabledModes = 0;
        Object.keys(mode).forEach(key => {
            if (mode[key]) {
                numEnabledModes++;
            }
        });
        assert.eq(
            1, numEnabledModes, "One and only one execution mode can be enabled " + tojson(mode));

        return mode;
    }

    function validateExecutionOptions(mode, options) {
        var allowedKeys = [
            'dbNamePrefix',
            'iterationMultiplier',
            'sessionOptions',
            'actionFiles',
            'threadMultiplier'
        ];

        if (mode.parallel || mode.composed) {
            allowedKeys.push('numSubsets');
            allowedKeys.push('subsetSize');
        }
        if (mode.composed) {
            allowedKeys.push('composeProb');
            allowedKeys.push('iterations');
        }

        Object.keys(options).forEach(function(option) {
            assert.contains(option,
                            allowedKeys,
                            'invalid option: ' + tojson(option) +
                                '; valid options are: ' + tojson(allowedKeys));
        });

        if (typeof options.subsetSize !== 'undefined') {
            assert(Number.isInteger(options.subsetSize), 'expected subset size to be an integer');
            assert.gt(options.subsetSize, 1);
        }

        if (typeof options.numSubsets !== 'undefined') {
            assert(Number.isInteger(options.numSubsets),
                   'expected number of subsets to be an integer');
            assert.gt(options.numSubsets, 0);
        }

        if (typeof options.iterations !== 'undefined') {
            assert(Number.isInteger(options.iterations),
                   'expected number of iterations to be an integer');
            assert.gt(options.iterations, 0);
        }

        if (typeof options.composeProb !== 'undefined') {
            assert.eq('number', typeof options.composeProb);
            assert.gt(options.composeProb, 0);
            assert.lte(options.composeProb, 1);
        }

        if (typeof options.dbNamePrefix !== 'undefined') {
            assert.eq(
                'string', typeof options.dbNamePrefix, 'expected dbNamePrefix to be a string');
        }

        options.iterationMultiplier = options.iterationMultiplier || 1;
        assert(Number.isInteger(options.iterationMultiplier),
               'expected iterationMultiplier to be an integer');
        assert.gte(options.iterationMultiplier,
                   1,
                   'expected iterationMultiplier to be greater than or equal to 1');

        if (typeof options.actionFiles !== 'undefined') {
            assert.eq('string',
                      typeof options.actionFiles.permitted,
                      'expected actionFiles.permitted to be a string');

            assert.eq('string',
                      typeof options.actionFiles.idleRequest,
                      'expected actionFiles.idleRequest to be a string');

            assert.eq('string',
                      typeof options.actionFiles.idleAck,
                      'expected actionFiles.idleAck to be a string');
        }

        options.threadMultiplier = options.threadMultiplier || 1;
        assert(Number.isInteger(options.threadMultiplier),
               'expected threadMultiplier to be an integer');
        assert.gte(options.threadMultiplier,
                   1,
                   'expected threadMultiplier to be greater than or equal to 1');

        return options;
    }

    function validateCleanupOptions(options) {
        var allowedKeys = ['dropDatabaseDenylist', 'keepExistingDatabases', 'validateCollections'];

        Object.keys(options).forEach(function(option) {
            assert.contains(option,
                            allowedKeys,
                            'invalid option: ' + tojson(option) +
                                '; valid options are: ' + tojson(allowedKeys));
        });

        if (typeof options.dropDatabaseDenylist !== 'undefined') {
            assert(Array.isArray(options.dropDatabaseDenylist),
                   'expected dropDatabaseDenylist to be an array');
        }

        if (typeof options.keepExistingDatabases !== 'undefined') {
            assert.eq('boolean',
                      typeof options.keepExistingDatabases,
                      'expected keepExistingDatabases to be a boolean');
        }

        options.validateCollections =
            options.hasOwnProperty('validateCollections') ? options.validateCollections : true;
        assert.eq('boolean',
                  typeof options.validateCollections,
                  'expected validateCollections to be a boolean');

        return options;
    }

    /**
     * Returns an array containing sets of workloads.
     * Each set of workloads is executed together according to the execution mode.
     *
     * For example, returning [ [ workload1, workload2 ], [ workload2, workload3 ] ]
     * when 'executionMode.parallel == true' causes workloads #1 and #2 to be
     * executed simultaneously, followed by workloads #2 and #3 together.
     */
    function scheduleWorkloads(workloads, executionMode, executionOptions) {
        if (executionMode.serial) {
            return Array.shuffle(workloads).map(function(workload) {
                return [workload];  // run each workload by itself
            });
        }

        var schedule = [];

        // Take 'numSubsets' random subsets of the workloads, each
        // of size 'subsetSize'. Each workload must get scheduled
        // once before any workload can be scheduled again.
        var subsetSize = executionOptions.subsetSize || 10;

        // If the number of subsets is not specified, then have each
        // workload get scheduled 2 to 3 times.
        var numSubsets = executionOptions.numSubsets;
        if (!numSubsets) {
            numSubsets = Math.ceil(2.5 * workloads.length / subsetSize);
        }

        workloads = workloads.slice(0);  // copy
        workloads = Array.shuffle(workloads);

        var start = 0;
        var end = subsetSize;

        while (schedule.length < numSubsets) {
            schedule.push(workloads.slice(start, end));

            start = end;
            end += subsetSize;

            // Check if there are not enough elements remaining in
            // 'workloads' to make a subset of size 'subsetSize'.
            if (end > workloads.length) {
                // Re-shuffle the beginning of the array, and prepend it
                // with the workloads that have not been scheduled yet.
                var temp = Array.shuffle(workloads.slice(0, start));
                for (var i = workloads.length - 1; i >= start; --i) {
                    temp.unshift(workloads[i]);
                }
                workloads = temp;

                start = 0;
                end = subsetSize;
            }
        }

        return schedule;
    }

    function prepareCollections(workloads, context, cluster, clusterOptions, executionOptions) {
        var dbName, collName, myDB;
        var firstWorkload = true;

        workloads.forEach(function(workload) {
            // Workloads cannot have a shardKey if sameCollection is specified
            if (clusterOptions.sameCollection && cluster.isSharded() &&
                context[workload].config.data.shardKey) {
                throw new Error('cannot specify a shardKey with sameCollection option');
            }
            if (firstWorkload || !clusterOptions.sameCollection) {
                if (firstWorkload || !clusterOptions.sameDB) {
                    dbName = uniqueDBName(executionOptions.dbNamePrefix);
                }
                collName = uniqueCollName();
                myDB = cluster.getDB(dbName);
                myDB[collName].drop();

                if (cluster.isSharded()) {
                    var shardKey = context[workload].config.data.shardKey || {_id: 'hashed'};
                    // TODO: allow workload config data to specify split
                    cluster.shardCollection(myDB[collName], shardKey, false);
                }
            }

            context[workload].db = myDB;
            context[workload].dbName = dbName;
            context[workload].collName = collName;

            firstWorkload = false;
        });
    }

    function dropAllDatabases(db, denylist) {
        var res = db.adminCommand('listDatabases');
        assert.commandWorked(res);

        res.databases.forEach(function(dbInfo) {
            if (!Array.contains(denylist, dbInfo.name)) {
                assert.commandWorked(db.getSiblingDB(dbInfo.name).dropDatabase());
            }
        });
    }

    function cleanupWorkloadData(workloads, context, clusterOptions) {
        // If no other workloads will be using this collection,
        // then drop it to avoid having too many files open
        if (!clusterOptions.sameCollection) {
            workloads.forEach(function(workload) {
                var config = context[workload];
                config.db[config.collName].drop();
            });
        }

        // If no other workloads will be using this database,
        // then drop it to avoid having too many files open
        if (!clusterOptions.sameDB) {
            workloads.forEach(function(workload) {
                var config = context[workload];
                config.db.dropDatabase();
            });
        }
    }

    function WorkloadFailure(err, stack, tid, header) {
        this.err = err;
        this.stack = stack;
        this.tid = tid;
        this.header = header;

        this.format = function format() {
            return this.header + '\n' + this.err + '\n\n' + this.stack;
        };
    }

    function throwError(workerErrs) {
        // Returns an array containing all unique values from the stackTraces array,
        // their corresponding number of occurrences in the stackTraces array, and
        // the associated thread ids (tids).
        function freqCount(stackTraces, tids) {
            var uniqueStackTraces = [];
            var associatedTids = [];

            stackTraces.forEach(function(item, stackTraceIndex) {
                var i = uniqueStackTraces.indexOf(item);
                if (i < 0) {
                    uniqueStackTraces.push(item);
                    associatedTids.push(new Set([tids[stackTraceIndex]]));
                } else {
                    associatedTids[i].add(tids[stackTraceIndex]);
                }
            });

            return uniqueStackTraces.map(function(value, i) {
                return {
                    value: value,
                    freq: associatedTids[i].size,
                    tids: Array.from(associatedTids[i])
                };
            });
        }

        // Indents a multiline string with the specified number of spaces.
        function indent(str, size) {
            var prefix = new Array(size + 1).join(' ');
            return prefix + str.split('\n').join('\n' + prefix);
        }

        function pluralize(str, num) {
            var suffix = num > 1 ? 's' : '';
            return num + ' ' + str + suffix;
        }

        function prepareMsg(workerErrs) {
            var stackTraces = workerErrs.map(e => e.format());
            var stackTids = workerErrs.map(e => e.tid);
            var uniqueTraces = freqCount(stackTraces, stackTids);
            var numUniqueTraces = uniqueTraces.length;

            // Special case message when threads all have the same trace
            if (numUniqueTraces === 1) {
                return pluralize('thread', stackTraces.length) + ' with tids ' +
                    JSON.stringify(stackTids) + ' threw\n\n' + indent(uniqueTraces[0].value, 8);
            }

            var summary = pluralize('exception', stackTraces.length) + ' were thrown, ' +
                numUniqueTraces + ' of which were unique:\n\n';

            return summary +
                uniqueTraces
                    .map(function(obj) {
                        var line = pluralize('thread', obj.freq) + ' with tids ' +
                            JSON.stringify(obj.tids) + ' threw\n';
                        return indent(line + obj.value, 8);
                    })
                    .join('\n\n');
        }

        if (workerErrs.length > 0) {
            var err = new Error(prepareMsg(workerErrs) + '\n');

            // Avoid having any stack traces omitted from the logs
            var maxLogLine = 10 * 1024;  // 10KB

            // Check if the combined length of the error message and the stack traces
            // exceeds the maximum line-length the shell will log.
            if ((err.message.length + err.stack.length) >= maxLogLine) {
                print(err.message);
                print(err.stack);
                throw new Error('stack traces would have been snipped, see logs');
            }

            throw err;
        }
    }

    function setupWorkload(workload, context, cluster) {
        var myDB = context[workload].db;
        var collName = context[workload].collName;

        var config = context[workload].config;
        config.setup.call(config.data, myDB, collName, cluster);
    }

    function teardownWorkload(workload, context, cluster) {
        var myDB = context[workload].db;
        var collName = context[workload].collName;

        var config = context[workload].config;
        config.teardown.call(config.data, myDB, collName, cluster);
    }

    function setIterations(config) {
        // This property must be enumerable because of SERVER-21338, which prevents
        // objects with non-enumerable properties from being serialized properly in
        // Threads.
        Object.defineProperty(
            config.data, 'iterations', {enumerable: true, value: config.iterations});
    }

    function setThreadCount(config) {
        // This property must be enumerable because of SERVER-21338, which prevents
        // objects with non-enumerable properties from being serialized properly in
        // Threads.
        Object.defineProperty(
            config.data, 'threadCount', {enumerable: true, value: config.threadCount});
    }

    async function loadWorkloadContext(workloads, context, executionOptions, applyMultipliers) {
        for (const workload of workloads) {
            const {$config} = await import(workload);
            assert.neq('undefined', typeof $config, '$config was not defined by ' + workload);
            print(tojson($config));
            context[workload] = {config: parseConfig($config)};
            if (applyMultipliers) {
                context[workload].config.iterations *= executionOptions.iterationMultiplier;
                context[workload].config.threadCount *= executionOptions.threadMultiplier;
            }
        }
    }

    function printWorkloadSchedule(schedule) {
        // Print out the entire schedule of workloads to make it easier to run the same
        // schedule when debugging test failures.
        jsTest.log('The entire schedule of FSM workloads:');

        // Note: We use printjsononeline (instead of just plain printjson) to make it
        // easier to reuse the output in variable assignments.
        printjsononeline(schedule);

        jsTest.log('End of schedule');
    }

    function cleanupWorkload(
        workload, context, cluster, errors, header, dbHashDenylist, cleanupOptions) {
        // Returns true if the workload's teardown succeeds and false if the workload's
        // teardown fails.

        var phase = 'before workload ' + workload + ' teardown';

        try {
            // Ensure that all data has replicated correctly to the secondaries before calling the
            // workload's teardown method.
            cluster.checkReplicationConsistency(dbHashDenylist, phase);
        } catch (e) {
            errors.push(new WorkloadFailure(
                e.toString(), e.stack, 'main', header + ' checking consistency on secondaries'));
            return false;
        }

        try {
            if (cleanupOptions.validateCollections) {
                cluster.validateAllCollections(phase);
            }
        } catch (e) {
            errors.push(new WorkloadFailure(
                e.toString(), e.stack, 'main', header + ' validating collections'));
            return false;
        }

        try {
            teardownWorkload(workload, context, cluster);
        } catch (e) {
            errors.push(new WorkloadFailure(e.toString(), e.stack, 'main', header + ' Teardown'));
            return false;
        }
        return true;
    }

    function runWorkloadGroup(threadMgr,
                              workloads,
                              context,
                              cluster,
                              clusterOptions,
                              executionMode,
                              executionOptions,
                              errors,
                              maxAllowedThreads,
                              dbHashDenylist,
                              cleanupOptions) {
        var cleanup = [];
        var teardownFailed = false;
        var startTime = Date.now();  // Initialize in case setupWorkload fails below.
        var totalTime;

        jsTest.log('Workload(s) started: ' + workloads.join(' '));

        prepareCollections(workloads, context, cluster, clusterOptions, executionOptions);

        try {
            // Set up the thread manager for this set of foreground workloads.
            startTime = Date.now();
            threadMgr.init(workloads, context, maxAllowedThreads);

            // Call each foreground workload's setup function.
            workloads.forEach(function(workload) {
                // Define "iterations" and "threadCount" properties on the foreground workload's
                // $config.data object so that they can be used within its setup(), teardown(), and
                // state functions. This must happen after calling threadMgr.init() in case the
                // thread counts needed to be scaled down.
                setIterations(context[workload].config);
                setThreadCount(context[workload].config);

                setupWorkload(workload, context, cluster);
                cleanup.push(workload);
            });

            // Since the worker threads may be running with causal consistency enabled, we set the
            // initial clusterTime and initial operationTime for the sessions they'll create so that
            // they are guaranteed to observe the effects of the workload's $config.setup() function
            // being called.
            if (typeof executionOptions.sessionOptions === 'object' &&
                executionOptions.sessionOptions !== null) {
                // We only start a session for the worker threads and never start one for the main
                // thread. We can therefore get the clusterTime and operationTime tracked by the
                // underlying DummyDriverSession through any DB instance (i.e. the "test" database
                // here was chosen arbitrarily).
                const session = cluster.getDB('test').getSession();

                // JavaScript objects backed by C++ objects (e.g. BSON values from a command
                // response) do not serialize correctly when passed through the Thread
                // constructor. To work around this behavior, we instead pass a stringified form of
                // the JavaScript object through the Thread constructor and use eval() to
                // rehydrate it.
                executionOptions.sessionOptions.initialClusterTime =
                    tojson(session.getClusterTime());
                executionOptions.sessionOptions.initialOperationTime =
                    tojson(session.getOperationTime());
            }

            try {
                // Start this set of foreground workload threads.
                threadMgr.spawnAll(cluster, executionOptions);
                // Allow 20% of foreground threads to fail. This allows the workloads to run on
                // underpowered test hosts.
                threadMgr.checkFailed(0.2);
            } finally {
                // Threads must be joined before destruction, so do this
                // even in the presence of exceptions.
                errors.push(...threadMgr.joinAll().map(
                    e => new WorkloadFailure(
                        e.err, e.stack, e.tid, 'Foreground ' + e.workloads.join(' '))));
            }
        } finally {
            // Call each foreground workload's teardown function. After all teardowns have completed
            // check if any of them failed.
            var cleanupResults = cleanup.map(workload => cleanupWorkload(workload,
                                                                         context,
                                                                         cluster,
                                                                         errors,
                                                                         'Foreground',
                                                                         dbHashDenylist,
                                                                         cleanupOptions));
            teardownFailed = cleanupResults.some(success => (success === false));

            totalTime = Date.now() - startTime;
            jsTest.log('Workload(s) completed in ' + totalTime + ' ms: ' + workloads.join(' '));
        }

        // Only drop the collections/databases if all the workloads ran successfully.
        if (!errors.length && !teardownFailed) {
            cleanupWorkloadData(workloads, context, clusterOptions);
        }

        // Throw any existing errors so that the schedule aborts.
        throwError(errors);

        // Ensure that all operations replicated correctly to the secondaries.
        cluster.checkReplicationConsistency(dbHashDenylist,
                                            'after workload-group teardown and data clean-up');
    }

    async function runWorkloads(
        workloads, clusterOptions, executionMode, executionOptions, cleanupOptions) {
        assert.gt(workloads.length, 0, 'need at least one workload to run');

        executionMode = validateExecutionMode(executionMode);
        Object.freeze(executionMode);  // immutable after validation (and normalization)

        validateExecutionOptions(executionMode, executionOptions);
        Object.freeze(executionOptions);  // immutable after validation (and normalization)

        validateCleanupOptions(cleanupOptions);
        Object.freeze(cleanupOptions);  // immutable after validation (and normalization)

        if (executionMode.composed) {
            clusterOptions.sameDB = true;
            clusterOptions.sameCollection = true;
        }

        // Determine how strong to make assertions while simultaneously executing
        // different workloads.
        var assertLevel = AssertLevel.OWN_DB;
        if (clusterOptions.sameDB) {
            // The database is shared by multiple workloads, so only make the asserts
            // that apply when the collection is owned by an individual workload.
            assertLevel = AssertLevel.OWN_COLL;
        }
        if (clusterOptions.sameCollection) {
            // The collection is shared by multiple workloads, so only make the asserts
            // that always apply.
            assertLevel = AssertLevel.ALWAYS;
        }
        globalAssertLevel = assertLevel;

        var context = {};
        await loadWorkloadContext(
            workloads, context, executionOptions, true /* applyMultipliers */);
        var threadMgr = new ThreadManager(clusterOptions, executionMode);

        var cluster = new Cluster(clusterOptions);
        cluster.setup();

        // Clean up the state left behind by other tests in the concurrency suite
        // to avoid having too many open files.

        // List of DBs that will not be dropped.
        var dbDenylist = ['admin', 'config', 'local', '$external'];

        // List of DBs that dbHash is not run on.
        var dbHashDenylist = ['local'];

        if (cleanupOptions.dropDatabaseDenylist) {
            dbDenylist.push(...cleanupOptions.dropDatabaseDenylist);
            dbHashDenylist.push(...cleanupOptions.dropDatabaseDenylist);
        }
        if (!cleanupOptions.keepExistingDatabases) {
            dropAllDatabases(cluster.getDB('test'), dbDenylist);
        }

        var maxAllowedThreads = 100 * executionOptions.threadMultiplier;
        Random.setRandomSeed(clusterOptions.seed);
        var errors = [];

        try {
            var schedule = scheduleWorkloads(workloads, executionMode, executionOptions);
            printWorkloadSchedule(schedule);

            schedule.forEach(function(workloads) {
                // Make a deep copy of the $config object for each of the workloads that are
                // going to be run to ensure the workload starts with a fresh version of its
                // $config.data. This is necessary because $config.data keeps track of
                // thread-local state that may be updated during a workload's setup(),
                // teardown(), and state functions.
                var groupContext = {};
                workloads.forEach(function(workload) {
                    groupContext[workload] = Object.extend({}, context[workload], true);
                });

                // Run the next group of workloads in the schedule.
                runWorkloadGroup(threadMgr,
                                 workloads,
                                 groupContext,
                                 cluster,
                                 clusterOptions,
                                 executionMode,
                                 executionOptions,
                                 errors,
                                 maxAllowedThreads,
                                 dbHashDenylist,
                                 cleanupOptions);
            });

            throwError(errors);
        } finally {
            cluster.teardown();
        }
    }

    return {
        serial: async function serial(workloads, clusterOptions, executionOptions, cleanupOptions) {
            clusterOptions = clusterOptions || {};
            executionOptions = executionOptions || {};
            cleanupOptions = cleanupOptions || {};

            await runWorkloads(
                workloads, clusterOptions, {serial: true}, executionOptions, cleanupOptions);
        },

        parallel:
            async function parallel(workloads, clusterOptions, executionOptions, cleanupOptions) {
                clusterOptions = clusterOptions || {};
                executionOptions = executionOptions || {};
                cleanupOptions = cleanupOptions || {};

                await runWorkloads(
                    workloads, clusterOptions, {parallel: true}, executionOptions, cleanupOptions);
            },

        composed:
            async function composed(workloads, clusterOptions, executionOptions, cleanupOptions) {
                clusterOptions = clusterOptions || {};
                executionOptions = executionOptions || {};
                cleanupOptions = cleanupOptions || {};

                await runWorkloads(
                    workloads, clusterOptions, {composed: true}, executionOptions, cleanupOptions);
            },

        internals: {
            validateExecutionOptions,
            prepareCollections,
            WorkloadFailure,
            throwError,
            setupWorkload,
            teardownWorkload,
            setIterations,
            setThreadCount,
            loadWorkloadContext,
        }
    };
})();

export const runWorkloadsSerially = runner.serial;
export const runWorkloadsInParallel = runner.parallel;
export const runCompositionOfWorkloads = runner.composed;