mirror of https://github.com/mongodb/mongo
371 lines
15 KiB
Python
371 lines
15 KiB
Python
"""Tools for splitting suites into parallelizable sub-suites."""
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from datetime import datetime
|
|
from itertools import chain
|
|
from typing import NamedTuple, Callable, Optional, List, Dict, Any
|
|
|
|
import inject
|
|
import requests
|
|
import structlog
|
|
from evergreen import EvergreenApi
|
|
|
|
from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService
|
|
from buildscripts.task_generation.suite_split_strategies import SplitStrategy, FallbackStrategy
|
|
from buildscripts.timeouts.timeout import TimeoutEstimate
|
|
from buildscripts.util import taskname
|
|
from buildscripts.util.teststats import HistoricTaskData, TestRuntime, normalize_test_name
|
|
|
|
LOGGER = structlog.getLogger(__name__)
|
|
|
|
CLEAN_EVERY_N_HOOK = "CleanEveryN"
|
|
HEADER_TEMPLATE = """# DO NOT EDIT THIS FILE. All manual edits will be lost.
|
|
# This file was generated by {file}.
|
|
"""
|
|
|
|
|
|
# pylint: disable=too-many-arguments
|
|
class SubSuite(object):
|
|
"""A suite of tests that can be run by evergreen."""
|
|
|
|
def __init__(self, test_list: List[str], task_overhead: float,
|
|
runtime_list: Optional[List[TestRuntime]] = None) -> None:
|
|
"""
|
|
Initialize the object.
|
|
|
|
:param test_list: List of tests to include in this sub-suite.
|
|
:param task_overhead: Runtime overhead to expect from task level hooks.
|
|
:param runtime_list: List of historic runtimes for tests in test_list.
|
|
"""
|
|
runtime_count = 0
|
|
total_runtime = 0.0
|
|
max_runtime = 0.0
|
|
if runtime_list:
|
|
runtime_map = {test.test_name: test.runtime for test in runtime_list}
|
|
for test in test_list:
|
|
if test in runtime_map and runtime_map[test] > 0:
|
|
runtime_count += 1
|
|
total_runtime += runtime_map[test]
|
|
max_runtime = max(max_runtime, runtime_map[test])
|
|
|
|
self.test_list = test_list
|
|
self.tests_with_runtime_info = runtime_count
|
|
self.max_test_runtime = max_runtime
|
|
self.expected_runtime = total_runtime + task_overhead
|
|
|
|
def should_overwrite_timeout(self) -> bool:
|
|
"""
|
|
Whether the timeout for this suite should be overwritten.
|
|
|
|
We should only overwrite the timeout if we have runtime info for all tests.
|
|
"""
|
|
return len(self) == self.tests_with_runtime_info
|
|
|
|
def get_timeout_estimate(self) -> TimeoutEstimate:
|
|
"""Get the estimated runtime of this task to for timeouts."""
|
|
if self.should_overwrite_timeout():
|
|
return TimeoutEstimate(max_test_runtime=self.max_test_runtime,
|
|
expected_task_runtime=self.expected_runtime)
|
|
return TimeoutEstimate.no_timeouts()
|
|
|
|
def __len__(self) -> int:
|
|
return len(self.test_list)
|
|
|
|
|
|
class GeneratedSuite(NamedTuple):
|
|
"""
|
|
Collection of sub-suites generated from the a parent suite.
|
|
|
|
sub_suites: List of sub-suites comprising whole suite.
|
|
build_variant: Name of build variant suite will run on.
|
|
task_name: Name of task generating suite.
|
|
suite_name: Name of suite.
|
|
include_build_variant_in_name: Include the build variant as part of display task names.
|
|
"""
|
|
|
|
sub_suites: List[SubSuite]
|
|
build_variant: str
|
|
task_name: str
|
|
suite_name: str
|
|
|
|
def get_test_list(self) -> List[str]:
|
|
"""Get the list of tests that will be run by this suite."""
|
|
return list(chain.from_iterable(sub_suite.test_list for sub_suite in self.sub_suites))
|
|
|
|
def __len__(self) -> int:
|
|
"""Get the number of sub-suites."""
|
|
return len(self.sub_suites)
|
|
|
|
def sub_suite_config_file(self, index: Optional[int] = None) -> str:
|
|
"""
|
|
Get the name of the file to store the resmoke configuration.
|
|
|
|
:param index: Index of suite or None for '_misc' suite.
|
|
:return: Name of generated resmoke.py configuration file.
|
|
"""
|
|
# Use self.task_name here instead of self.suite_name since multiple tasks can have the same resmoke.py suite.
|
|
return f"{taskname.name_generated_task(self.task_name, index, len(self.sub_suites))}.yml"
|
|
|
|
def sub_suite_task_name(self, index: Optional[int] = None) -> str:
|
|
"""
|
|
Get the name of the task that runs one of the generated sub-suites.
|
|
|
|
:param index: Index of suite or None for '_misc' suite.
|
|
:return: Name of generated Evergreen task.
|
|
"""
|
|
return taskname.name_generated_task(self.task_name, index, len(self.sub_suites),
|
|
self.build_variant)
|
|
|
|
def sub_suite_test_list(self, index: int) -> List[str]:
|
|
"""Get the list of tests from a sub-suite."""
|
|
return self.sub_suites[index].test_list
|
|
|
|
|
|
class SuiteSplitParameters(NamedTuple):
|
|
"""
|
|
Parameters for splitting resmoke suites.
|
|
|
|
build_variant: Build variant generated for.
|
|
task_name: Name of task being split.
|
|
suite_name: Name of suite being split.
|
|
filename: Filename of suite configuration.
|
|
is_asan: Whether the build variant being generated on is ASAN.
|
|
test_file_filter: Optional filter describing which tests should be included.
|
|
"""
|
|
|
|
build_variant: str
|
|
task_name: str
|
|
suite_name: str
|
|
filename: str
|
|
is_asan: bool = False
|
|
test_file_filter: Optional[Callable[[str], bool]] = None
|
|
|
|
|
|
class SuiteSplitConfig(NamedTuple):
|
|
"""
|
|
Global configuration for generating suites.
|
|
|
|
evg_project: Evergreen project.
|
|
target_resmoke_time: Target runtime for generated sub-suites.
|
|
max_sub_suites: Max number of sub-suites to generate.
|
|
max_tests_per_suite: Max number of tests to put in a single sub-suite.
|
|
start_date: Start date to query for test history.
|
|
end_date: End date to query for test history.
|
|
default_to_fallback: Use the fallback method for splitting tasks rather than dynamic splitting.
|
|
include_build_variant_in_name: Include the build variant as part of display task names.
|
|
"""
|
|
|
|
evg_project: str
|
|
target_resmoke_time: int
|
|
max_sub_suites: int
|
|
max_tests_per_suite: int
|
|
start_date: datetime
|
|
end_date: datetime
|
|
default_to_fallback: bool = False
|
|
include_build_variant_in_name: bool = False
|
|
|
|
|
|
class SuiteSplitService:
|
|
"""A service for splitting resmoke suites into sub-suites that can be run in parallel."""
|
|
|
|
@inject.autoparams()
|
|
def __init__(
|
|
self,
|
|
evg_api: EvergreenApi,
|
|
resmoke_proxy: ResmokeProxyService,
|
|
config: SuiteSplitConfig,
|
|
split_strategy: SplitStrategy,
|
|
fallback_strategy: FallbackStrategy,
|
|
) -> None:
|
|
"""
|
|
Initialize the suite split service.
|
|
|
|
:param evg_api: Evergreen API client.
|
|
:param resmoke_proxy: Resmoke Proxy service.
|
|
:param config: Configuration options of how to split suites.
|
|
"""
|
|
self.evg_api = evg_api
|
|
self.resmoke_proxy = resmoke_proxy
|
|
self.config = config
|
|
self.split_strategy = split_strategy
|
|
self.fallback_strategy = fallback_strategy
|
|
|
|
def split_suite(self, params: SuiteSplitParameters) -> GeneratedSuite:
|
|
"""
|
|
Split the given resmoke suite into multiple sub-suites.
|
|
|
|
:param params: Description of suite to split.
|
|
:return: List of sub-suites from the given suite.
|
|
"""
|
|
if self.config.default_to_fallback:
|
|
return self.calculate_fallback_suites(params)
|
|
|
|
try:
|
|
evg_stats = HistoricTaskData.from_evg(self.evg_api, self.config.evg_project,
|
|
self.config.start_date, self.config.end_date,
|
|
params.task_name, params.build_variant)
|
|
if not evg_stats:
|
|
LOGGER.debug("No test history, using fallback suites")
|
|
# This is probably a new suite, since there is no test history, just use the
|
|
# fallback values.
|
|
return self.calculate_fallback_suites(params)
|
|
return self.calculate_suites_from_evg_stats(evg_stats, params)
|
|
except requests.HTTPError as err:
|
|
if err.response.status_code == requests.codes.SERVICE_UNAVAILABLE:
|
|
# Evergreen may return a 503 when the service is degraded.
|
|
# We fall back to splitting the tests into a fixed number of suites.
|
|
LOGGER.warning("Received 503 from Evergreen, "
|
|
"dividing the tests evenly among suites")
|
|
return self.calculate_fallback_suites(params)
|
|
else:
|
|
raise
|
|
|
|
def calculate_fallback_suites(self, params: SuiteSplitParameters) -> GeneratedSuite:
|
|
"""Divide tests into a fixed number of suites."""
|
|
LOGGER.debug("Splitting tasks based on fallback", max_sub_suites=self.config.max_sub_suites)
|
|
test_list = self.resmoke_proxy.list_tests(params.suite_name)
|
|
if params.test_file_filter:
|
|
test_list = [test for test in test_list if params.test_file_filter(test)]
|
|
|
|
test_lists = self.fallback_strategy(test_list, self.config.max_sub_suites)
|
|
return self.test_lists_to_suite(test_lists, params, [])
|
|
|
|
def calculate_suites_from_evg_stats(self, test_stats: HistoricTaskData,
|
|
params: SuiteSplitParameters) -> GeneratedSuite:
|
|
"""
|
|
Divide tests into suites that can be run in less than the specified execution time.
|
|
|
|
:param test_stats: Historical test results for task being split.
|
|
:param params: Description of how to split the suite.
|
|
:return: List of sub suites calculated.
|
|
"""
|
|
execution_time_secs = self.config.target_resmoke_time * 60
|
|
tests_runtimes = self.filter_tests(test_stats.get_tests_runtimes(), params)
|
|
if not tests_runtimes:
|
|
LOGGER.debug("No test runtimes after filter, using fallback")
|
|
return self.calculate_fallback_suites(params)
|
|
|
|
test_lists = self.split_strategy(tests_runtimes, execution_time_secs,
|
|
self.config.max_sub_suites,
|
|
self.config.max_tests_per_suite,
|
|
LOGGER.bind(task=params.task_name))
|
|
|
|
return self.test_lists_to_suite(test_lists, params, tests_runtimes, test_stats)
|
|
|
|
def test_lists_to_suite(self, test_lists: List[List[str]], params: SuiteSplitParameters,
|
|
tests_runtimes: List[TestRuntime],
|
|
test_stats: Optional[HistoricTaskData] = None) -> GeneratedSuite:
|
|
"""
|
|
Create sub-suites for the given test lists.
|
|
|
|
:param test_lists: List of tests lists to create suites for.
|
|
:param params: Parameters for suite creation.
|
|
:param tests_runtimes: Historic runtimes of tests.
|
|
:param test_stats: Other historic task data.
|
|
:return: Generated suite for the sub-suites specified.
|
|
"""
|
|
|
|
sub_suites = []
|
|
for _, test_list in enumerate(test_lists):
|
|
task_overhead = self.get_task_hook_overhead(params.suite_name, params.is_asan,
|
|
len(test_list), test_stats)
|
|
sub_suites.append(SubSuite(test_list, task_overhead, tests_runtimes))
|
|
|
|
task_name = params.task_name
|
|
if self.config.include_build_variant_in_name:
|
|
task_name = f"{params.task_name}_{params.build_variant}"
|
|
|
|
return GeneratedSuite(
|
|
sub_suites=sub_suites,
|
|
build_variant=params.build_variant,
|
|
task_name=task_name,
|
|
suite_name=params.suite_name,
|
|
)
|
|
|
|
def filter_tests(self, tests_runtimes: List[TestRuntime],
|
|
params: SuiteSplitParameters) -> List[TestRuntime]:
|
|
"""
|
|
Filter out tests that do not exist in the filesystem.
|
|
|
|
:param tests_runtimes: List of tests with runtimes to filter.
|
|
:param params: Suite split parameters.
|
|
:return: Test list with unneeded tests filtered out.
|
|
"""
|
|
if params.test_file_filter:
|
|
tests_runtimes = [
|
|
test for test in tests_runtimes if params.test_file_filter(test.test_name)
|
|
]
|
|
all_tests = [
|
|
normalize_test_name(test) for test in self.resmoke_proxy.list_tests(params.suite_name)
|
|
]
|
|
return [
|
|
info for info in tests_runtimes
|
|
if os.path.exists(info.test_name) and info.test_name in all_tests
|
|
]
|
|
|
|
def get_task_hook_overhead(self, suite_name: str, is_asan: bool, test_count: int,
|
|
historic_stats: Optional[HistoricTaskData]) -> float:
|
|
"""
|
|
Add how much overhead task-level hooks each suite should account for.
|
|
|
|
Certain test hooks need to be accounted for on the task level instead of the test level
|
|
in order to calculate accurate timeouts. So we will add details about those hooks to
|
|
each suite here.
|
|
|
|
:param suite_name: Name of suite being generated.
|
|
:param is_asan: Whether ASAN is being used.
|
|
:param test_count: Number of tests in sub-suite.
|
|
:param historic_stats: Historic runtime data of the suite.
|
|
"""
|
|
# The CleanEveryN hook is run every 'N' tests. The runtime of the
|
|
# hook will be associated with whichever test happens to be running, which could be
|
|
# different every run. So we need to take its runtime into account at the task level.
|
|
if historic_stats is None:
|
|
return 0.0
|
|
|
|
clean_every_n_cadence = self._get_clean_every_n_cadence(suite_name, is_asan)
|
|
avg_clean_every_n_runtime = historic_stats.get_avg_hook_runtime(CLEAN_EVERY_N_HOOK)
|
|
LOGGER.debug("task hook overhead", cadence=clean_every_n_cadence,
|
|
runtime=avg_clean_every_n_runtime, is_asan=is_asan)
|
|
if avg_clean_every_n_runtime != 0:
|
|
n_expected_runs = test_count / clean_every_n_cadence
|
|
return n_expected_runs * avg_clean_every_n_runtime
|
|
return 0.0
|
|
|
|
def _get_clean_every_n_cadence(self, suite_name: str, is_asan: bool) -> int:
|
|
"""
|
|
Get the N value for the CleanEveryN hook.
|
|
|
|
:param suite_name: Name of suite being generated.
|
|
:param is_asan: Whether ASAN is being used.
|
|
:return: How frequently clean every end is run.
|
|
"""
|
|
# Default to 1, which is the worst case meaning CleanEveryN would run for every test.
|
|
clean_every_n_cadence = 1
|
|
if is_asan:
|
|
# ASAN runs hard-code N to 1. See `resmokelib/testing/hooks/cleanup.py`.
|
|
return clean_every_n_cadence
|
|
|
|
clean_every_n_config = self._get_hook_config(suite_name, CLEAN_EVERY_N_HOOK)
|
|
if clean_every_n_config:
|
|
clean_every_n_cadence = clean_every_n_config.get("n", 1)
|
|
|
|
return clean_every_n_cadence
|
|
|
|
def _get_hook_config(self, suite_name: str, hook_name: str) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Get the configuration for the given hook.
|
|
|
|
:param hook_name: Name of hook to query.
|
|
:return: Configuration for hook, if it exists.
|
|
"""
|
|
hooks_config = self.resmoke_proxy.read_suite_config(suite_name).get("executor",
|
|
{}).get("hooks")
|
|
if hooks_config:
|
|
for hook in hooks_config:
|
|
if hook.get("class") == hook_name:
|
|
return hook
|
|
|
|
return None
|