SERVER-104256 Prioritize starting long-running tests first (#38980)

GitOrigin-RevId: 1b3860d41f7ce32a342b1b9015a77a689d191bd2
This commit is contained in:
Sean Lyons 2025-07-24 16:04:08 -04:00 committed by MongoDB Bot
parent 7cabe18f42
commit 788088de45
6 changed files with 200 additions and 18 deletions

View File

@ -598,7 +598,7 @@ SHELL_SEED = None
# If true, then the order the tests run in is randomized. Otherwise the tests will run in
# alphabetical (case-insensitive) order.
SHUFFLE = None
SHUFFLE_STRATEGY = None
# If true, the launching of jobs is staggered in resmoke.py.
STAGGER_JOBS = None

View File

@ -28,9 +28,12 @@ from opentelemetry.trace import NonRecordingSpan, SpanContext, TraceFlags
from buildscripts.idl import gen_all_feature_flag_list
from buildscripts.resmokelib import config as _config
from buildscripts.resmokelib import mongo_fuzzer_configs, multiversionsetupconstants, utils
from buildscripts.resmokelib.run import TestRunner
from buildscripts.resmokelib.utils.batched_baggage_span_processor import BatchedBaggageSpanProcessor
from buildscripts.resmokelib.utils.file_span_exporter import FileSpanExporter
from buildscripts.util.read_config import read_config_file
from buildscripts.util.taskname import determine_task_base_name
from buildscripts.util.teststats import HistoricTaskData
from evergreen.config import get_auth
BASE_16_TO_INT = 16
@ -876,13 +879,21 @@ flags in common: {common_set}
_config.LOGGER_DIR = os.path.join(_config.CONFIG_DIR, "loggers")
shuffle = config.pop("shuffle")
if shuffle == "auto":
# If the user specified a value for --jobs > 1 (or -j > 1), then default to randomize
# the order in which tests are executed. This is because with multiple threads the tests
# wouldn't run in a deterministic order anyway.
_config.SHUFFLE = _config.JOBS > 1
else:
_config.SHUFFLE = shuffle == "on"
if (
shuffle == "longest-first"
and _config.EVERGREEN_TASK_NAME
and _config.EVERGREEN_VARIANT_NAME
and _config.EVERGREEN_PROJECT_NAME
):
base_task = determine_task_base_name(
_config.EVERGREEN_TASK_NAME, _config.EVERGREEN_VARIANT_NAME
)
historic_task_data = HistoricTaskData.from_s3(
_config.EVERGREEN_PROJECT_NAME, base_task, _config.EVERGREEN_VARIANT_NAME
)
_config.SHUFFLE_STRATEGY = TestRunner.LongestFirstPartialShuffle(historic_task_data)
elif shuffle != "off":
_config.SHUFFLE_STRATEGY = TestRunner.RandomShuffle()
conn_string = config.pop("shell_conn_string")
port = config.pop("shell_port")

View File

@ -7,10 +7,12 @@ import os.path
import platform
import random
import shlex
import statistics
import subprocess
import sys
import textwrap
import time
from abc import ABC, abstractmethod
from logging import Logger
from typing import List, Optional
@ -42,6 +44,7 @@ from buildscripts.resmokelib.suitesconfig import get_suite_files
from buildscripts.resmokelib.testing.docker_cluster_image_builder import build_images
from buildscripts.resmokelib.testing.suite import Suite
from buildscripts.resmokelib.utils.dictionary import get_dict_value
from buildscripts.util.teststats import HistoricTaskData
_INTERNAL_OPTIONS_TITLE = "Internal Options"
_MONGODB_SERVER_OPTIONS_TITLE = "MongoDB Server Options"
@ -806,7 +809,7 @@ class TestRunner(Subcommand):
@TRACER.start_as_current_span("run.__init__._execute_suite")
def _execute_suite(self, suite: Suite) -> bool:
"""Execute Fa suite and return True if interrupted, False otherwise."""
"""Execute a suite and return True if interrupted, False otherwise."""
execute_suite_span = trace.get_current_span()
execute_suite_span.set_attributes(attributes=suite.get_suite_otel_attributes())
self._shuffle_tests(suite)
@ -903,10 +906,93 @@ class TestRunner(Subcommand):
)
return False
class ShuffleStrategy(ABC):
@abstractmethod
def shuffle(self, tests):
pass
class RandomShuffle(ShuffleStrategy):
"""A completely random shuffle."""
def shuffle(self, tests):
random.shuffle(tests)
return tests
class LongestFirstPartialShuffle(ShuffleStrategy):
"""
A partial shuffle that prioritizes starting longer running tests earlier.
For an illustration of typical shuffling results, see the test for this
in buildscripts/tests/resmokelib/run/test_shuffle_tests.py
"""
def __init__(self, historic_task_data: HistoricTaskData):
self.runtimes_historic = {}
for result in historic_task_data.historic_test_results:
self.runtimes_historic[result.test_name] = result.avg_duration
def shuffle(self, tests):
"""
Performs a weighted_shuffle, where tests with a higher weight are more likely to be started earlier.
The weight is determined by how many standard deviations above the mean runtime a particular test is.
All tests below the mean or without historic data are equal weighted.
"""
total, mean, stdev = self.compute_stats(tests)
if not total:
# Zero tests had historic runtime information
return TestRunner.RandomShuffle().shuffle(tests)
arr = []
for test in tests:
if test in self.runtimes_historic:
stdevs_above_mean = (self.runtimes_historic[test] - mean) / stdev
weight = max(
stdevs_above_mean * len(tests), 1
) # max(_, 1) ensures positive, non-zero weight.
else:
weight = 1
arr.append((test, weight))
return self.weighted_shuffle(arr)
def compute_stats(self, tests):
total = 0
runtimes = []
for test in tests:
if not isinstance(test, str):
# `test` is itself many tests, in parallel_fsm_workload_test suites
return None, None, None
if test in self.runtimes_historic:
total += self.runtimes_historic[test]
runtimes.append(self.runtimes_historic[test])
if len(runtimes) < 2:
# There is not enough tests with historic data to compute stdev
return None, None, None
mean = statistics.mean(runtimes)
stdev = statistics.stdev(runtimes)
return total, mean, stdev
def weighted_shuffle(self, arr):
"""Shuffle an array of tuples (element, weight). Weights should be positive, non-zero."""
for i, _ in enumerate(arr):
v = self.weighted_index_choice(arr[i:])
arr[i + v], arr[i] = arr[i], arr[i + v]
return [test for test, _ in arr]
def weighted_index_choice(self, arr):
total_weight = sum(weight for _, weight in arr)
choice = random.random() * total_weight
i = 0
cur = 0
while True:
weight = arr[i][1]
cur += weight
if choice <= cur:
return i
i += 1
def _shuffle_tests(self, suite: Suite):
"""Shuffle the tests if the shuffle cli option was set."""
random.seed(config.RANDOM_SEED)
if not config.SHUFFLE:
if not config.SHUFFLE_STRATEGY:
return
self._exec_logger.info(
"Shuffling order of tests for %ss in suite %s. The seed is %d.",
@ -914,7 +1000,7 @@ class TestRunner(Subcommand):
suite.get_display_name(),
config.RANDOM_SEED,
)
random.shuffle(suite.tests)
suite.tests = config.SHUFFLE_STRATEGY.shuffle(suite.tests)
def _get_suites(self) -> List[Suite]:
"""Return the list of suites for this resmoke invocation."""
@ -1550,11 +1636,11 @@ class RunPlugin(PluginInterface):
parser.add_argument(
"--shuffle",
action="store_const",
const="on",
const="random",
dest="shuffle",
help=(
"Randomizes the order in which tests are executed. This is equivalent"
" to specifying --shuffleMode=on."
" to specifying --shuffleMode=random."
),
)
@ -1562,12 +1648,12 @@ class RunPlugin(PluginInterface):
"--shuffleMode",
action="store",
dest="shuffle",
choices=("on", "off", "auto"),
metavar="ON|OFF|AUTO",
choices=("random", "longest-first", "off"),
metavar="random|longest-first|off",
help=(
"Controls whether to randomize the order in which tests are executed."
" Defaults to auto when not supplied. auto enables randomization in"
" all cases except when the number of jobs requested is 1."
" The longest-first option requires historic runtime information via the evergreen"
" project/variant/task name, otherwise fallsback to completely random."
),
)

View File

@ -110,6 +110,10 @@ class Suite(object):
self._tests, self._excluded = self._get_tests_for_kind(self.test_kind)
return self._tests
@tests.setter
def tests(self, tests):
self._tests = tests
@property
def excluded(self):
"""Get the excluded."""

View File

@ -0,0 +1,81 @@
import random
import unittest
from collections import namedtuple
from buildscripts.resmokelib.run import TestRunner
from buildscripts.util.teststats import HistoricalTestInformation, HistoricTaskData
class TestShuffle(unittest.TestCase):
def test_random_shuffle(self):
random.seed(0)
tests = ["a", "b", "c", "d"]
expected = ["c", "a", "b", "d"]
actual = TestRunner.RandomShuffle().shuffle(tests)
self.assertListEqual(actual, expected)
def test_slowest_first_partial_shuffle(self):
tests = ["a", "b", "c", "d"]
history = HistoricTaskData.from_stats_list(
[
HistoricalTestInformation(
test_name="a",
num_pass=1,
num_fail=0,
avg_duration_pass=1000,
max_duration_pass=1000,
),
HistoricalTestInformation(
test_name="b",
num_pass=1,
num_fail=0,
avg_duration_pass=1,
max_duration_pass=1,
),
HistoricalTestInformation(
test_name="c",
num_pass=1,
num_fail=0,
avg_duration_pass=1,
max_duration_pass=1,
),
HistoricalTestInformation(
test_name="d",
num_pass=1,
num_fail=0,
avg_duration_pass=1,
max_duration_pass=1,
),
]
)
TestCase = namedtuple("TestCase", ["seed", "expected"])
# The weighted shuffle is effective as long as 'a' is prioritized to be earlier,
# while all other equal runtime tests are completely random.
testcases = [
TestCase(0, ["c", "a", "b", "d"]),
TestCase(1, ["a", "d", "b", "c"]),
TestCase(2, ["d", "a", "c", "b"]),
TestCase(3, ["a", "c", "b", "d"]),
TestCase(4, ["a", "b", "c", "d"]),
TestCase(5, ["a", "d", "b", "c"]),
TestCase(6, ["c", "a", "b", "d"]),
TestCase(7, ["a", "b", "d", "c"]),
TestCase(8, ["a", "d", "c", "b"]),
TestCase(9, ["a", "c", "b", "d"]),
]
for testcase in testcases:
random.seed(testcase.seed)
actual = TestRunner.LongestFirstPartialShuffle(history).shuffle(tests)
self.assertListEqual(
actual, testcase.expected, f"Testcase with seed {testcase.seed} failed."
)
def test_slowest_first_partial_shuffle_empty(self):
random.seed(0)
history = HistoricTaskData.from_stats_list([])
tests = ["a", "b", "c", "d"]
expected = ["c", "a", "b", "d"]
actual = TestRunner.LongestFirstPartialShuffle(history).shuffle(tests)
self.assertListEqual(actual, expected)

View File

@ -79,7 +79,7 @@ if [[ ${disable_unit_tests} = "false" && ! -f ${skip_tests} ]]; then
extra_args="$extra_args --jobs=${resmoke_jobs}"
if [ ${should_shuffle} = true ]; then
extra_args="$extra_args --shuffle"
extra_args="$extra_args --shuffleMode=longest-first"
elif [ ${should_shuffle} = false ]; then
extra_args="$extra_args --shuffleMode=off"
fi