mirror of https://github.com/mongodb/mongo
SERVER-104256 Prioritize starting long-running tests first (#38980)
GitOrigin-RevId: 1b3860d41f7ce32a342b1b9015a77a689d191bd2
This commit is contained in:
parent
7cabe18f42
commit
788088de45
|
|
@ -598,7 +598,7 @@ SHELL_SEED = None
|
||||||
|
|
||||||
# If true, then the order the tests run in is randomized. Otherwise the tests will run in
|
# If true, then the order the tests run in is randomized. Otherwise the tests will run in
|
||||||
# alphabetical (case-insensitive) order.
|
# alphabetical (case-insensitive) order.
|
||||||
SHUFFLE = None
|
SHUFFLE_STRATEGY = None
|
||||||
|
|
||||||
# If true, the launching of jobs is staggered in resmoke.py.
|
# If true, the launching of jobs is staggered in resmoke.py.
|
||||||
STAGGER_JOBS = None
|
STAGGER_JOBS = None
|
||||||
|
|
|
||||||
|
|
@ -28,9 +28,12 @@ from opentelemetry.trace import NonRecordingSpan, SpanContext, TraceFlags
|
||||||
from buildscripts.idl import gen_all_feature_flag_list
|
from buildscripts.idl import gen_all_feature_flag_list
|
||||||
from buildscripts.resmokelib import config as _config
|
from buildscripts.resmokelib import config as _config
|
||||||
from buildscripts.resmokelib import mongo_fuzzer_configs, multiversionsetupconstants, utils
|
from buildscripts.resmokelib import mongo_fuzzer_configs, multiversionsetupconstants, utils
|
||||||
|
from buildscripts.resmokelib.run import TestRunner
|
||||||
from buildscripts.resmokelib.utils.batched_baggage_span_processor import BatchedBaggageSpanProcessor
|
from buildscripts.resmokelib.utils.batched_baggage_span_processor import BatchedBaggageSpanProcessor
|
||||||
from buildscripts.resmokelib.utils.file_span_exporter import FileSpanExporter
|
from buildscripts.resmokelib.utils.file_span_exporter import FileSpanExporter
|
||||||
from buildscripts.util.read_config import read_config_file
|
from buildscripts.util.read_config import read_config_file
|
||||||
|
from buildscripts.util.taskname import determine_task_base_name
|
||||||
|
from buildscripts.util.teststats import HistoricTaskData
|
||||||
from evergreen.config import get_auth
|
from evergreen.config import get_auth
|
||||||
|
|
||||||
BASE_16_TO_INT = 16
|
BASE_16_TO_INT = 16
|
||||||
|
|
@ -876,13 +879,21 @@ flags in common: {common_set}
|
||||||
_config.LOGGER_DIR = os.path.join(_config.CONFIG_DIR, "loggers")
|
_config.LOGGER_DIR = os.path.join(_config.CONFIG_DIR, "loggers")
|
||||||
|
|
||||||
shuffle = config.pop("shuffle")
|
shuffle = config.pop("shuffle")
|
||||||
if shuffle == "auto":
|
if (
|
||||||
# If the user specified a value for --jobs > 1 (or -j > 1), then default to randomize
|
shuffle == "longest-first"
|
||||||
# the order in which tests are executed. This is because with multiple threads the tests
|
and _config.EVERGREEN_TASK_NAME
|
||||||
# wouldn't run in a deterministic order anyway.
|
and _config.EVERGREEN_VARIANT_NAME
|
||||||
_config.SHUFFLE = _config.JOBS > 1
|
and _config.EVERGREEN_PROJECT_NAME
|
||||||
else:
|
):
|
||||||
_config.SHUFFLE = shuffle == "on"
|
base_task = determine_task_base_name(
|
||||||
|
_config.EVERGREEN_TASK_NAME, _config.EVERGREEN_VARIANT_NAME
|
||||||
|
)
|
||||||
|
historic_task_data = HistoricTaskData.from_s3(
|
||||||
|
_config.EVERGREEN_PROJECT_NAME, base_task, _config.EVERGREEN_VARIANT_NAME
|
||||||
|
)
|
||||||
|
_config.SHUFFLE_STRATEGY = TestRunner.LongestFirstPartialShuffle(historic_task_data)
|
||||||
|
elif shuffle != "off":
|
||||||
|
_config.SHUFFLE_STRATEGY = TestRunner.RandomShuffle()
|
||||||
|
|
||||||
conn_string = config.pop("shell_conn_string")
|
conn_string = config.pop("shell_conn_string")
|
||||||
port = config.pop("shell_port")
|
port = config.pop("shell_port")
|
||||||
|
|
|
||||||
|
|
@ -7,10 +7,12 @@ import os.path
|
||||||
import platform
|
import platform
|
||||||
import random
|
import random
|
||||||
import shlex
|
import shlex
|
||||||
|
import statistics
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
import textwrap
|
import textwrap
|
||||||
import time
|
import time
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
from logging import Logger
|
from logging import Logger
|
||||||
from typing import List, Optional
|
from typing import List, Optional
|
||||||
|
|
||||||
|
|
@ -42,6 +44,7 @@ from buildscripts.resmokelib.suitesconfig import get_suite_files
|
||||||
from buildscripts.resmokelib.testing.docker_cluster_image_builder import build_images
|
from buildscripts.resmokelib.testing.docker_cluster_image_builder import build_images
|
||||||
from buildscripts.resmokelib.testing.suite import Suite
|
from buildscripts.resmokelib.testing.suite import Suite
|
||||||
from buildscripts.resmokelib.utils.dictionary import get_dict_value
|
from buildscripts.resmokelib.utils.dictionary import get_dict_value
|
||||||
|
from buildscripts.util.teststats import HistoricTaskData
|
||||||
|
|
||||||
_INTERNAL_OPTIONS_TITLE = "Internal Options"
|
_INTERNAL_OPTIONS_TITLE = "Internal Options"
|
||||||
_MONGODB_SERVER_OPTIONS_TITLE = "MongoDB Server Options"
|
_MONGODB_SERVER_OPTIONS_TITLE = "MongoDB Server Options"
|
||||||
|
|
@ -806,7 +809,7 @@ class TestRunner(Subcommand):
|
||||||
|
|
||||||
@TRACER.start_as_current_span("run.__init__._execute_suite")
|
@TRACER.start_as_current_span("run.__init__._execute_suite")
|
||||||
def _execute_suite(self, suite: Suite) -> bool:
|
def _execute_suite(self, suite: Suite) -> bool:
|
||||||
"""Execute Fa suite and return True if interrupted, False otherwise."""
|
"""Execute a suite and return True if interrupted, False otherwise."""
|
||||||
execute_suite_span = trace.get_current_span()
|
execute_suite_span = trace.get_current_span()
|
||||||
execute_suite_span.set_attributes(attributes=suite.get_suite_otel_attributes())
|
execute_suite_span.set_attributes(attributes=suite.get_suite_otel_attributes())
|
||||||
self._shuffle_tests(suite)
|
self._shuffle_tests(suite)
|
||||||
|
|
@ -903,10 +906,93 @@ class TestRunner(Subcommand):
|
||||||
)
|
)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
class ShuffleStrategy(ABC):
|
||||||
|
@abstractmethod
|
||||||
|
def shuffle(self, tests):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class RandomShuffle(ShuffleStrategy):
|
||||||
|
"""A completely random shuffle."""
|
||||||
|
|
||||||
|
def shuffle(self, tests):
|
||||||
|
random.shuffle(tests)
|
||||||
|
return tests
|
||||||
|
|
||||||
|
class LongestFirstPartialShuffle(ShuffleStrategy):
|
||||||
|
"""
|
||||||
|
A partial shuffle that prioritizes starting longer running tests earlier.
|
||||||
|
|
||||||
|
For an illustration of typical shuffling results, see the test for this
|
||||||
|
in buildscripts/tests/resmokelib/run/test_shuffle_tests.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, historic_task_data: HistoricTaskData):
|
||||||
|
self.runtimes_historic = {}
|
||||||
|
for result in historic_task_data.historic_test_results:
|
||||||
|
self.runtimes_historic[result.test_name] = result.avg_duration
|
||||||
|
|
||||||
|
def shuffle(self, tests):
|
||||||
|
"""
|
||||||
|
Performs a weighted_shuffle, where tests with a higher weight are more likely to be started earlier.
|
||||||
|
The weight is determined by how many standard deviations above the mean runtime a particular test is.
|
||||||
|
All tests below the mean or without historic data are equal weighted.
|
||||||
|
"""
|
||||||
|
total, mean, stdev = self.compute_stats(tests)
|
||||||
|
if not total:
|
||||||
|
# Zero tests had historic runtime information
|
||||||
|
return TestRunner.RandomShuffle().shuffle(tests)
|
||||||
|
arr = []
|
||||||
|
for test in tests:
|
||||||
|
if test in self.runtimes_historic:
|
||||||
|
stdevs_above_mean = (self.runtimes_historic[test] - mean) / stdev
|
||||||
|
weight = max(
|
||||||
|
stdevs_above_mean * len(tests), 1
|
||||||
|
) # max(_, 1) ensures positive, non-zero weight.
|
||||||
|
else:
|
||||||
|
weight = 1
|
||||||
|
arr.append((test, weight))
|
||||||
|
return self.weighted_shuffle(arr)
|
||||||
|
|
||||||
|
def compute_stats(self, tests):
|
||||||
|
total = 0
|
||||||
|
runtimes = []
|
||||||
|
for test in tests:
|
||||||
|
if not isinstance(test, str):
|
||||||
|
# `test` is itself many tests, in parallel_fsm_workload_test suites
|
||||||
|
return None, None, None
|
||||||
|
if test in self.runtimes_historic:
|
||||||
|
total += self.runtimes_historic[test]
|
||||||
|
runtimes.append(self.runtimes_historic[test])
|
||||||
|
if len(runtimes) < 2:
|
||||||
|
# There is not enough tests with historic data to compute stdev
|
||||||
|
return None, None, None
|
||||||
|
mean = statistics.mean(runtimes)
|
||||||
|
stdev = statistics.stdev(runtimes)
|
||||||
|
return total, mean, stdev
|
||||||
|
|
||||||
|
def weighted_shuffle(self, arr):
|
||||||
|
"""Shuffle an array of tuples (element, weight). Weights should be positive, non-zero."""
|
||||||
|
for i, _ in enumerate(arr):
|
||||||
|
v = self.weighted_index_choice(arr[i:])
|
||||||
|
arr[i + v], arr[i] = arr[i], arr[i + v]
|
||||||
|
return [test for test, _ in arr]
|
||||||
|
|
||||||
|
def weighted_index_choice(self, arr):
|
||||||
|
total_weight = sum(weight for _, weight in arr)
|
||||||
|
choice = random.random() * total_weight
|
||||||
|
i = 0
|
||||||
|
cur = 0
|
||||||
|
while True:
|
||||||
|
weight = arr[i][1]
|
||||||
|
cur += weight
|
||||||
|
if choice <= cur:
|
||||||
|
return i
|
||||||
|
i += 1
|
||||||
|
|
||||||
def _shuffle_tests(self, suite: Suite):
|
def _shuffle_tests(self, suite: Suite):
|
||||||
"""Shuffle the tests if the shuffle cli option was set."""
|
"""Shuffle the tests if the shuffle cli option was set."""
|
||||||
random.seed(config.RANDOM_SEED)
|
random.seed(config.RANDOM_SEED)
|
||||||
if not config.SHUFFLE:
|
if not config.SHUFFLE_STRATEGY:
|
||||||
return
|
return
|
||||||
self._exec_logger.info(
|
self._exec_logger.info(
|
||||||
"Shuffling order of tests for %ss in suite %s. The seed is %d.",
|
"Shuffling order of tests for %ss in suite %s. The seed is %d.",
|
||||||
|
|
@ -914,7 +1000,7 @@ class TestRunner(Subcommand):
|
||||||
suite.get_display_name(),
|
suite.get_display_name(),
|
||||||
config.RANDOM_SEED,
|
config.RANDOM_SEED,
|
||||||
)
|
)
|
||||||
random.shuffle(suite.tests)
|
suite.tests = config.SHUFFLE_STRATEGY.shuffle(suite.tests)
|
||||||
|
|
||||||
def _get_suites(self) -> List[Suite]:
|
def _get_suites(self) -> List[Suite]:
|
||||||
"""Return the list of suites for this resmoke invocation."""
|
"""Return the list of suites for this resmoke invocation."""
|
||||||
|
|
@ -1550,11 +1636,11 @@ class RunPlugin(PluginInterface):
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--shuffle",
|
"--shuffle",
|
||||||
action="store_const",
|
action="store_const",
|
||||||
const="on",
|
const="random",
|
||||||
dest="shuffle",
|
dest="shuffle",
|
||||||
help=(
|
help=(
|
||||||
"Randomizes the order in which tests are executed. This is equivalent"
|
"Randomizes the order in which tests are executed. This is equivalent"
|
||||||
" to specifying --shuffleMode=on."
|
" to specifying --shuffleMode=random."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -1562,12 +1648,12 @@ class RunPlugin(PluginInterface):
|
||||||
"--shuffleMode",
|
"--shuffleMode",
|
||||||
action="store",
|
action="store",
|
||||||
dest="shuffle",
|
dest="shuffle",
|
||||||
choices=("on", "off", "auto"),
|
choices=("random", "longest-first", "off"),
|
||||||
metavar="ON|OFF|AUTO",
|
metavar="random|longest-first|off",
|
||||||
help=(
|
help=(
|
||||||
"Controls whether to randomize the order in which tests are executed."
|
"Controls whether to randomize the order in which tests are executed."
|
||||||
" Defaults to auto when not supplied. auto enables randomization in"
|
" The longest-first option requires historic runtime information via the evergreen"
|
||||||
" all cases except when the number of jobs requested is 1."
|
" project/variant/task name, otherwise fallsback to completely random."
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -110,6 +110,10 @@ class Suite(object):
|
||||||
self._tests, self._excluded = self._get_tests_for_kind(self.test_kind)
|
self._tests, self._excluded = self._get_tests_for_kind(self.test_kind)
|
||||||
return self._tests
|
return self._tests
|
||||||
|
|
||||||
|
@tests.setter
|
||||||
|
def tests(self, tests):
|
||||||
|
self._tests = tests
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def excluded(self):
|
def excluded(self):
|
||||||
"""Get the excluded."""
|
"""Get the excluded."""
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,81 @@
|
||||||
|
import random
|
||||||
|
import unittest
|
||||||
|
from collections import namedtuple
|
||||||
|
|
||||||
|
from buildscripts.resmokelib.run import TestRunner
|
||||||
|
from buildscripts.util.teststats import HistoricalTestInformation, HistoricTaskData
|
||||||
|
|
||||||
|
|
||||||
|
class TestShuffle(unittest.TestCase):
|
||||||
|
def test_random_shuffle(self):
|
||||||
|
random.seed(0)
|
||||||
|
tests = ["a", "b", "c", "d"]
|
||||||
|
expected = ["c", "a", "b", "d"]
|
||||||
|
actual = TestRunner.RandomShuffle().shuffle(tests)
|
||||||
|
self.assertListEqual(actual, expected)
|
||||||
|
|
||||||
|
def test_slowest_first_partial_shuffle(self):
|
||||||
|
tests = ["a", "b", "c", "d"]
|
||||||
|
history = HistoricTaskData.from_stats_list(
|
||||||
|
[
|
||||||
|
HistoricalTestInformation(
|
||||||
|
test_name="a",
|
||||||
|
num_pass=1,
|
||||||
|
num_fail=0,
|
||||||
|
avg_duration_pass=1000,
|
||||||
|
max_duration_pass=1000,
|
||||||
|
),
|
||||||
|
HistoricalTestInformation(
|
||||||
|
test_name="b",
|
||||||
|
num_pass=1,
|
||||||
|
num_fail=0,
|
||||||
|
avg_duration_pass=1,
|
||||||
|
max_duration_pass=1,
|
||||||
|
),
|
||||||
|
HistoricalTestInformation(
|
||||||
|
test_name="c",
|
||||||
|
num_pass=1,
|
||||||
|
num_fail=0,
|
||||||
|
avg_duration_pass=1,
|
||||||
|
max_duration_pass=1,
|
||||||
|
),
|
||||||
|
HistoricalTestInformation(
|
||||||
|
test_name="d",
|
||||||
|
num_pass=1,
|
||||||
|
num_fail=0,
|
||||||
|
avg_duration_pass=1,
|
||||||
|
max_duration_pass=1,
|
||||||
|
),
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
TestCase = namedtuple("TestCase", ["seed", "expected"])
|
||||||
|
# The weighted shuffle is effective as long as 'a' is prioritized to be earlier,
|
||||||
|
# while all other equal runtime tests are completely random.
|
||||||
|
testcases = [
|
||||||
|
TestCase(0, ["c", "a", "b", "d"]),
|
||||||
|
TestCase(1, ["a", "d", "b", "c"]),
|
||||||
|
TestCase(2, ["d", "a", "c", "b"]),
|
||||||
|
TestCase(3, ["a", "c", "b", "d"]),
|
||||||
|
TestCase(4, ["a", "b", "c", "d"]),
|
||||||
|
TestCase(5, ["a", "d", "b", "c"]),
|
||||||
|
TestCase(6, ["c", "a", "b", "d"]),
|
||||||
|
TestCase(7, ["a", "b", "d", "c"]),
|
||||||
|
TestCase(8, ["a", "d", "c", "b"]),
|
||||||
|
TestCase(9, ["a", "c", "b", "d"]),
|
||||||
|
]
|
||||||
|
|
||||||
|
for testcase in testcases:
|
||||||
|
random.seed(testcase.seed)
|
||||||
|
actual = TestRunner.LongestFirstPartialShuffle(history).shuffle(tests)
|
||||||
|
self.assertListEqual(
|
||||||
|
actual, testcase.expected, f"Testcase with seed {testcase.seed} failed."
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_slowest_first_partial_shuffle_empty(self):
|
||||||
|
random.seed(0)
|
||||||
|
history = HistoricTaskData.from_stats_list([])
|
||||||
|
tests = ["a", "b", "c", "d"]
|
||||||
|
expected = ["c", "a", "b", "d"]
|
||||||
|
actual = TestRunner.LongestFirstPartialShuffle(history).shuffle(tests)
|
||||||
|
self.assertListEqual(actual, expected)
|
||||||
|
|
@ -79,7 +79,7 @@ if [[ ${disable_unit_tests} = "false" && ! -f ${skip_tests} ]]; then
|
||||||
extra_args="$extra_args --jobs=${resmoke_jobs}"
|
extra_args="$extra_args --jobs=${resmoke_jobs}"
|
||||||
|
|
||||||
if [ ${should_shuffle} = true ]; then
|
if [ ${should_shuffle} = true ]; then
|
||||||
extra_args="$extra_args --shuffle"
|
extra_args="$extra_args --shuffleMode=longest-first"
|
||||||
elif [ ${should_shuffle} = false ]; then
|
elif [ ${should_shuffle} = false ]; then
|
||||||
extra_args="$extra_args --shuffleMode=off"
|
extra_args="$extra_args --shuffleMode=off"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue