SERVER-104256 Prioritize starting long-running tests first (#38980)

GitOrigin-RevId: 1b3860d41f7ce32a342b1b9015a77a689d191bd2
2025-07-24 16:04:08 -04:00 · 2025-07-24 16:04:08 -04:00 · 788088de45
parent 7cabe18f42
commit 788088de45
6 changed files with 200 additions and 18 deletions
--- a/buildscripts/resmokelib/config.py
+++ b/buildscripts/resmokelib/config.py
@ -598,7 +598,7 @@ SHELL_SEED = None

 # If true, then the order the tests run in is randomized. Otherwise the tests will run in
 # alphabetical (case-insensitive) order.
-SHUFFLE = None
+SHUFFLE_STRATEGY = None

 # If true, the launching of jobs is staggered in resmoke.py.
 STAGGER_JOBS = None
--- a/buildscripts/resmokelib/configure_resmoke.py
+++ b/buildscripts/resmokelib/configure_resmoke.py
@ -28,9 +28,12 @@ from opentelemetry.trace import NonRecordingSpan, SpanContext, TraceFlags
 from buildscripts.idl import gen_all_feature_flag_list
 from buildscripts.resmokelib import config as _config
 from buildscripts.resmokelib import mongo_fuzzer_configs, multiversionsetupconstants, utils
+from buildscripts.resmokelib.run import TestRunner
 from buildscripts.resmokelib.utils.batched_baggage_span_processor import BatchedBaggageSpanProcessor
 from buildscripts.resmokelib.utils.file_span_exporter import FileSpanExporter
 from buildscripts.util.read_config import read_config_file
+from buildscripts.util.taskname import determine_task_base_name
+from buildscripts.util.teststats import HistoricTaskData
 from evergreen.config import get_auth

 BASE_16_TO_INT = 16
@ -876,13 +879,21 @@ flags in common: {common_set}
    _config.LOGGER_DIR = os.path.join(_config.CONFIG_DIR, "loggers")

    shuffle = config.pop("shuffle")
-    if shuffle == "auto":
-        # If the user specified a value for --jobs > 1 (or -j > 1), then default to randomize
-        # the order in which tests are executed. This is because with multiple threads the tests
-        # wouldn't run in a deterministic order anyway.
-        _config.SHUFFLE = _config.JOBS > 1
-    else:
-        _config.SHUFFLE = shuffle == "on"
+    if (
+        shuffle == "longest-first"
+        and _config.EVERGREEN_TASK_NAME
+        and _config.EVERGREEN_VARIANT_NAME
+        and _config.EVERGREEN_PROJECT_NAME
+    ):
+        base_task = determine_task_base_name(
+            _config.EVERGREEN_TASK_NAME, _config.EVERGREEN_VARIANT_NAME
+        )
+        historic_task_data = HistoricTaskData.from_s3(
+            _config.EVERGREEN_PROJECT_NAME, base_task, _config.EVERGREEN_VARIANT_NAME
+        )
+        _config.SHUFFLE_STRATEGY = TestRunner.LongestFirstPartialShuffle(historic_task_data)
+    elif shuffle != "off":
+        _config.SHUFFLE_STRATEGY = TestRunner.RandomShuffle()

    conn_string = config.pop("shell_conn_string")
    port = config.pop("shell_port")
--- a/buildscripts/resmokelib/run/init.py
+++ b/buildscripts/resmokelib/run/init.py
@ -7,10 +7,12 @@ import os.path
 import platform
 import random
 import shlex
+import statistics
 import subprocess
 import sys
 import textwrap
 import time
+from abc import ABC, abstractmethod
 from logging import Logger
 from typing import List, Optional

@ -42,6 +44,7 @@ from buildscripts.resmokelib.suitesconfig import get_suite_files
 from buildscripts.resmokelib.testing.docker_cluster_image_builder import build_images
 from buildscripts.resmokelib.testing.suite import Suite
 from buildscripts.resmokelib.utils.dictionary import get_dict_value
+from buildscripts.util.teststats import HistoricTaskData

 _INTERNAL_OPTIONS_TITLE = "Internal Options"
 _MONGODB_SERVER_OPTIONS_TITLE = "MongoDB Server Options"
@ -806,7 +809,7 @@ class TestRunner(Subcommand):

    @TRACER.start_as_current_span("run.__init__._execute_suite")
    def _execute_suite(self, suite: Suite) -> bool:
-        """Execute Fa suite and return True if interrupted, False otherwise."""
+        """Execute a suite and return True if interrupted, False otherwise."""
        execute_suite_span = trace.get_current_span()
        execute_suite_span.set_attributes(attributes=suite.get_suite_otel_attributes())
        self._shuffle_tests(suite)
@ -903,10 +906,93 @@ class TestRunner(Subcommand):
        )
        return False

+    class ShuffleStrategy(ABC):
+        @abstractmethod
+        def shuffle(self, tests):
+            pass
+
+    class RandomShuffle(ShuffleStrategy):
+        """A completely random shuffle."""
+
+        def shuffle(self, tests):
+            random.shuffle(tests)
+            return tests
+
+    class LongestFirstPartialShuffle(ShuffleStrategy):
+        """
+        A partial shuffle that prioritizes starting longer running tests earlier.
+
+        For an illustration of typical shuffling results, see the test for this
+        in buildscripts/tests/resmokelib/run/test_shuffle_tests.py
+        """
+
+        def __init__(self, historic_task_data: HistoricTaskData):
+            self.runtimes_historic = {}
+            for result in historic_task_data.historic_test_results:
+                self.runtimes_historic[result.test_name] = result.avg_duration
+
+        def shuffle(self, tests):
+            """
+            Performs a weighted_shuffle, where tests with a higher weight are more likely to be started earlier.
+            The weight is determined by how many standard deviations above the mean runtime a particular test is.
+            All tests below the mean or without historic data are equal weighted.
+            """
+            total, mean, stdev = self.compute_stats(tests)
+            if not total:
+                # Zero tests had historic runtime information
+                return TestRunner.RandomShuffle().shuffle(tests)
+            arr = []
+            for test in tests:
+                if test in self.runtimes_historic:
+                    stdevs_above_mean = (self.runtimes_historic[test] - mean) / stdev
+                    weight = max(
+                        stdevs_above_mean * len(tests), 1
+                    )  # max(_, 1) ensures positive, non-zero weight.
+                else:
+                    weight = 1
+                arr.append((test, weight))
+            return self.weighted_shuffle(arr)
+
+        def compute_stats(self, tests):
+            total = 0
+            runtimes = []
+            for test in tests:
+                if not isinstance(test, str):
+                    # `test` is itself many tests, in parallel_fsm_workload_test suites
+                    return None, None, None
+                if test in self.runtimes_historic:
+                    total += self.runtimes_historic[test]
+                    runtimes.append(self.runtimes_historic[test])
+            if len(runtimes) < 2:
+                # There is not enough tests with historic data to compute stdev
+                return None, None, None
+            mean = statistics.mean(runtimes)
+            stdev = statistics.stdev(runtimes)
+            return total, mean, stdev
+
+        def weighted_shuffle(self, arr):
+            """Shuffle an array of tuples (element, weight). Weights should be positive, non-zero."""
+            for i, _ in enumerate(arr):
+                v = self.weighted_index_choice(arr[i:])
+                arr[i + v], arr[i] = arr[i], arr[i + v]
+            return [test for test, _ in arr]
+
+        def weighted_index_choice(self, arr):
+            total_weight = sum(weight for _, weight in arr)
+            choice = random.random() * total_weight
+            i = 0
+            cur = 0
+            while True:
+                weight = arr[i][1]
+                cur += weight
+                if choice <= cur:
+                    return i
+                i += 1
+
    def _shuffle_tests(self, suite: Suite):
        """Shuffle the tests if the shuffle cli option was set."""
        random.seed(config.RANDOM_SEED)
-        if not config.SHUFFLE:
+        if not config.SHUFFLE_STRATEGY:
            return
        self._exec_logger.info(
            "Shuffling order of tests for %ss in suite %s. The seed is %d.",
@ -914,7 +1000,7 @@ class TestRunner(Subcommand):
            suite.get_display_name(),
            config.RANDOM_SEED,
        )
-        random.shuffle(suite.tests)
+        suite.tests = config.SHUFFLE_STRATEGY.shuffle(suite.tests)

    def _get_suites(self) -> List[Suite]:
        """Return the list of suites for this resmoke invocation."""
@ -1550,11 +1636,11 @@ class RunPlugin(PluginInterface):
        parser.add_argument(
            "--shuffle",
            action="store_const",
-            const="on",
+            const="random",
            dest="shuffle",
            help=(
                "Randomizes the order in which tests are executed. This is equivalent"
-                " to specifying --shuffleMode=on."
+                " to specifying --shuffleMode=random."
            ),
        )

@ -1562,12 +1648,12 @@ class RunPlugin(PluginInterface):
            "--shuffleMode",
            action="store",
            dest="shuffle",
-            choices=("on", "off", "auto"),
-            metavar="ON|OFF|AUTO",
+            choices=("random", "longest-first", "off"),
+            metavar="random|longest-first|off",
            help=(
                "Controls whether to randomize the order in which tests are executed."
-                " Defaults to auto when not supplied. auto enables randomization in"
-                " all cases except when the number of jobs requested is 1."
+                " The longest-first option requires historic runtime information via the evergreen"
+                " project/variant/task name, otherwise fallsback to completely random."
            ),
        )

--- a/buildscripts/resmokelib/testing/suite.py
+++ b/buildscripts/resmokelib/testing/suite.py
@ -110,6 +110,10 @@ class Suite(object):
            self._tests, self._excluded = self._get_tests_for_kind(self.test_kind)
        return self._tests

+    @tests.setter
+    def tests(self, tests):
+      self._tests = tests
+
    @property
    def excluded(self):
        """Get the excluded."""
--- a/buildscripts/tests/resmokelib/run/test_shuffle_tests.py
+++ b/buildscripts/tests/resmokelib/run/test_shuffle_tests.py
@ -0,0 +1,81 @@
+import random
+import unittest
+from collections import namedtuple
+
+from buildscripts.resmokelib.run import TestRunner
+from buildscripts.util.teststats import HistoricalTestInformation, HistoricTaskData
+
+
+class TestShuffle(unittest.TestCase):
+    def test_random_shuffle(self):
+        random.seed(0)
+        tests = ["a", "b", "c", "d"]
+        expected = ["c", "a", "b", "d"]
+        actual = TestRunner.RandomShuffle().shuffle(tests)
+        self.assertListEqual(actual, expected)
+
+    def test_slowest_first_partial_shuffle(self):
+        tests = ["a", "b", "c", "d"]
+        history = HistoricTaskData.from_stats_list(
+            [
+                HistoricalTestInformation(
+                    test_name="a",
+                    num_pass=1,
+                    num_fail=0,
+                    avg_duration_pass=1000,
+                    max_duration_pass=1000,
+                ),
+                HistoricalTestInformation(
+                    test_name="b",
+                    num_pass=1,
+                    num_fail=0,
+                    avg_duration_pass=1,
+                    max_duration_pass=1,
+                ),
+                HistoricalTestInformation(
+                    test_name="c",
+                    num_pass=1,
+                    num_fail=0,
+                    avg_duration_pass=1,
+                    max_duration_pass=1,
+                ),
+                HistoricalTestInformation(
+                    test_name="d",
+                    num_pass=1,
+                    num_fail=0,
+                    avg_duration_pass=1,
+                    max_duration_pass=1,
+                ),
+            ]
+        )
+
+        TestCase = namedtuple("TestCase", ["seed", "expected"])
+        # The weighted shuffle is effective as long as 'a' is  prioritized to be earlier,
+        # while all other equal runtime tests are completely random.
+        testcases = [
+            TestCase(0, ["c", "a", "b", "d"]),
+            TestCase(1, ["a", "d", "b", "c"]),
+            TestCase(2, ["d", "a", "c", "b"]),
+            TestCase(3, ["a", "c", "b", "d"]),
+            TestCase(4, ["a", "b", "c", "d"]),
+            TestCase(5, ["a", "d", "b", "c"]),
+            TestCase(6, ["c", "a", "b", "d"]),
+            TestCase(7, ["a", "b", "d", "c"]),
+            TestCase(8, ["a", "d", "c", "b"]),
+            TestCase(9, ["a", "c", "b", "d"]),
+        ]
+
+        for testcase in testcases:
+            random.seed(testcase.seed)
+            actual = TestRunner.LongestFirstPartialShuffle(history).shuffle(tests)
+            self.assertListEqual(
+                actual, testcase.expected, f"Testcase with seed {testcase.seed} failed."
+            )
+
+    def test_slowest_first_partial_shuffle_empty(self):
+        random.seed(0)
+        history = HistoricTaskData.from_stats_list([])
+        tests = ["a", "b", "c", "d"]
+        expected = ["c", "a", "b", "d"]
+        actual = TestRunner.LongestFirstPartialShuffle(history).shuffle(tests)
+        self.assertListEqual(actual, expected)
--- a/evergreen/resmoke_tests_execute.sh
+++ b/evergreen/resmoke_tests_execute.sh
@ -79,7 +79,7 @@ if [[ ${disable_unit_tests} = "false" && ! -f ${skip_tests} ]]; then
    extra_args="$extra_args --jobs=${resmoke_jobs}"

    if [ ${should_shuffle} = true ]; then
-        extra_args="$extra_args --shuffle"
+        extra_args="$extra_args --shuffleMode=longest-first"
    elif [ ${should_shuffle} = false ]; then
        extra_args="$extra_args --shuffleMode=off"
    fi