mirror of https://github.com/mongodb/mongo
406 lines
18 KiB
Python
Executable File
406 lines
18 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""Determine the timeout value a task should use in evergreen."""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import math
|
|
import os
|
|
import shlex
|
|
import sys
|
|
from datetime import timedelta
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional
|
|
|
|
import inject
|
|
import structlog
|
|
import yaml
|
|
from pydantic import BaseModel
|
|
from evergreen import EvergreenApi, RetryingEvergreenApi
|
|
|
|
from buildscripts.ciconfig.evergreen import (EvergreenProjectConfig, parse_evergreen_file)
|
|
from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService
|
|
from buildscripts.timeouts.timeout_service import (TimeoutParams, TimeoutService)
|
|
from buildscripts.util.cmdutils import enable_logging
|
|
from buildscripts.util.taskname import determine_task_base_name
|
|
|
|
LOGGER = structlog.get_logger(__name__)
|
|
DEFAULT_TIMEOUT_OVERRIDES = "etc/evergreen_timeouts.yml"
|
|
DEFAULT_EVERGREEN_CONFIG = "etc/evergreen.yml"
|
|
DEFAULT_EVERGREEN_AUTH_CONFIG = "~/.evergreen.yml"
|
|
COMMIT_QUEUE_ALIAS = "__commit_queue"
|
|
IGNORED_SUITES = {
|
|
"integration_tests_replset",
|
|
"integration_tests_replset_ssl_auth",
|
|
"integration_tests_sharded",
|
|
"integration_tests_standalone",
|
|
"integration_tests_standalone_audit",
|
|
"mongos_test",
|
|
"server_selection_json_test",
|
|
"sdam_json_test",
|
|
}
|
|
HISTORY_LOOKBACK = timedelta(weeks=2)
|
|
|
|
COMMIT_QUEUE_TIMEOUT = timedelta(minutes=20)
|
|
DEFAULT_REQUIRED_BUILD_TIMEOUT = timedelta(hours=1, minutes=20)
|
|
DEFAULT_NON_REQUIRED_BUILD_TIMEOUT = timedelta(hours=2)
|
|
|
|
|
|
class TimeoutOverride(BaseModel):
|
|
"""
|
|
Specification for overriding a task timeout.
|
|
|
|
* task: Name of task to overide.
|
|
* exec_timeout: Value to override exec timeout with.
|
|
* idle_timeout: Value to override idle timeout with.
|
|
"""
|
|
|
|
task: str
|
|
exec_timeout: Optional[int] = None
|
|
idle_timeout: Optional[int] = None
|
|
|
|
@classmethod
|
|
def from_seconds(cls, task: str, exec_timeout_secs: Optional[float],
|
|
idle_timeout_secs: Optional[float]) -> TimeoutOverride:
|
|
"""Create an instance of an override from seconds."""
|
|
exec_timeout = exec_timeout_secs / 60 if exec_timeout_secs else None
|
|
idle_timeout = idle_timeout_secs / 60 if idle_timeout_secs else None
|
|
return cls(
|
|
task=task,
|
|
exec_timeout=exec_timeout,
|
|
idle_timeout=idle_timeout,
|
|
)
|
|
|
|
def get_exec_timeout(self) -> Optional[timedelta]:
|
|
"""Get a timedelta of the exec timeout to use."""
|
|
if self.exec_timeout is not None:
|
|
return timedelta(minutes=self.exec_timeout)
|
|
return None
|
|
|
|
def get_idle_timeout(self) -> Optional[timedelta]:
|
|
"""Get a timedelta of the idle timeout to use."""
|
|
if self.idle_timeout is not None:
|
|
return timedelta(minutes=self.idle_timeout)
|
|
return None
|
|
|
|
|
|
class TimeoutOverrides(BaseModel):
|
|
"""Collection of timeout overrides to apply."""
|
|
|
|
overrides: Dict[str, List[TimeoutOverride]]
|
|
|
|
@classmethod
|
|
def from_yaml_file(cls, file_path: Path) -> "TimeoutOverrides":
|
|
"""Read the timeout overrides from the given file."""
|
|
with open(file_path) as file_handler:
|
|
return cls(**yaml.safe_load(file_handler))
|
|
|
|
def _lookup_override(self, build_variant: str, task_name: str) -> Optional[TimeoutOverride]:
|
|
"""
|
|
Check if the given task on the given build variant has an override defined.
|
|
|
|
Note: If multiple overrides are found, an exception will be raised.
|
|
|
|
:param build_variant: Build Variant to check.
|
|
:param task_name: Task name to check.
|
|
:return: Timeout override if found.
|
|
"""
|
|
overrides = [
|
|
override for override in self.overrides.get(build_variant, [])
|
|
if override.task == task_name
|
|
]
|
|
if overrides:
|
|
if len(overrides) > 1:
|
|
LOGGER.error("Found multiple overrides for the same task",
|
|
build_variant=build_variant, task=task_name,
|
|
overrides=[override.dict() for override in overrides])
|
|
raise ValueError(f"Found multiple overrides for '{task_name}' on '{build_variant}'")
|
|
return overrides[0]
|
|
return None
|
|
|
|
def lookup_exec_override(self, build_variant: str, task_name: str) -> Optional[timedelta]:
|
|
"""
|
|
Look up the exec timeout override of the given build variant/task.
|
|
|
|
:param build_variant: Build Variant to check.
|
|
:param task_name: Task name to check.
|
|
:return: Exec timeout override if found.
|
|
"""
|
|
override = self._lookup_override(build_variant, task_name)
|
|
if override is not None:
|
|
return override.get_exec_timeout()
|
|
return None
|
|
|
|
def lookup_idle_override(self, build_variant: str, task_name: str) -> Optional[timedelta]:
|
|
"""
|
|
Look up the idle timeout override of the given build variant/task.
|
|
|
|
:param build_variant: Build Variant to check.
|
|
:param task_name: Task name to check.
|
|
:return: Idle timeout override if found.
|
|
"""
|
|
override = self._lookup_override(build_variant, task_name)
|
|
if override is not None:
|
|
return override.get_idle_timeout()
|
|
return None
|
|
|
|
|
|
def output_timeout(exec_timeout: timedelta, idle_timeout: Optional[timedelta],
|
|
output_file: Optional[str]) -> None:
|
|
"""
|
|
Output timeout configuration to the specified location.
|
|
|
|
:param exec_timeout: Exec timeout to output.
|
|
:param idle_timeout: Idle timeout to output.
|
|
:param output_file: Location of output file to write.
|
|
"""
|
|
output = {
|
|
"exec_timeout_secs": math.ceil(exec_timeout.total_seconds()),
|
|
}
|
|
if idle_timeout is not None:
|
|
output["timeout_secs"] = math.ceil(idle_timeout.total_seconds())
|
|
|
|
if output_file:
|
|
with open(output_file, "w") as outfile:
|
|
yaml.dump(output, stream=outfile, default_flow_style=False)
|
|
|
|
yaml.dump(output, stream=sys.stdout, default_flow_style=False)
|
|
|
|
|
|
class TaskTimeoutOrchestrator:
|
|
"""An orchestrator for determining task timeouts."""
|
|
|
|
@inject.autoparams()
|
|
def __init__(self, timeout_service: TimeoutService, timeout_overrides: TimeoutOverrides,
|
|
evg_project_config: EvergreenProjectConfig) -> None:
|
|
"""
|
|
Initialize the orchestrator.
|
|
|
|
:param timeout_service: Service for calculating historic timeouts.
|
|
:param timeout_overrides: Timeout overrides for specific tasks.
|
|
:param evg_project_config: Evergreen project configuration.
|
|
"""
|
|
self.timeout_service = timeout_service
|
|
self.timeout_overrides = timeout_overrides
|
|
self.evg_project_config = evg_project_config
|
|
|
|
def determine_exec_timeout(self, task_name: str, variant: str,
|
|
idle_timeout: Optional[timedelta] = None,
|
|
exec_timeout: Optional[timedelta] = None, evg_alias: str = "",
|
|
historic_timeout: Optional[timedelta] = None) -> timedelta:
|
|
"""
|
|
Determine what exec timeout should be used.
|
|
|
|
:param task_name: Name of task being run.
|
|
:param variant: Name of build variant being run.
|
|
:param idle_timeout: Idle timeout if specified.
|
|
:param exec_timeout: Override to use for exec_timeout or 0 if no override.
|
|
:param evg_alias: Evergreen alias running the task.
|
|
:param historic_timeout: Timeout determined by looking at previous task executions.
|
|
:return: Exec timeout to use for running task.
|
|
"""
|
|
determined_timeout = DEFAULT_NON_REQUIRED_BUILD_TIMEOUT
|
|
if historic_timeout is not None:
|
|
determined_timeout = historic_timeout
|
|
|
|
override = self.timeout_overrides.lookup_exec_override(variant, task_name)
|
|
|
|
if exec_timeout and exec_timeout.total_seconds() != 0:
|
|
LOGGER.info("Using timeout from cmd line",
|
|
exec_timeout_secs=exec_timeout.total_seconds())
|
|
determined_timeout = exec_timeout
|
|
|
|
elif override is not None:
|
|
LOGGER.info("Overriding configured timeout", exec_timeout_secs=override.total_seconds())
|
|
determined_timeout = override
|
|
|
|
elif self._is_required_build_variant(
|
|
variant) and determined_timeout > DEFAULT_REQUIRED_BUILD_TIMEOUT:
|
|
LOGGER.info("Overriding required-builder timeout",
|
|
exec_timeout_secs=DEFAULT_REQUIRED_BUILD_TIMEOUT.total_seconds())
|
|
determined_timeout = DEFAULT_REQUIRED_BUILD_TIMEOUT
|
|
|
|
elif evg_alias == COMMIT_QUEUE_ALIAS:
|
|
LOGGER.info("Overriding commit-queue timeout",
|
|
exec_timeout_secs=COMMIT_QUEUE_TIMEOUT.total_seconds())
|
|
determined_timeout = COMMIT_QUEUE_TIMEOUT
|
|
|
|
# The timeout needs to be at least as large as the idle timeout.
|
|
if idle_timeout and determined_timeout.total_seconds() < idle_timeout.total_seconds():
|
|
LOGGER.info("Making exec timeout as large as idle timeout",
|
|
exec_timeout_secs=idle_timeout.total_seconds())
|
|
return idle_timeout
|
|
|
|
return determined_timeout
|
|
|
|
def determine_idle_timeout(self, task_name: str, variant: str,
|
|
idle_timeout: Optional[timedelta] = None,
|
|
historic_timeout: Optional[timedelta] = None) -> Optional[timedelta]:
|
|
"""
|
|
Determine what idle timeout should be used.
|
|
|
|
:param task_name: Name of task being run.
|
|
:param variant: Name of build variant being run.
|
|
:param idle_timeout: Override to use for idle_timeout.
|
|
:param historic_timeout: Timeout determined by looking at previous task executions.
|
|
:return: Idle timeout to use for running task.
|
|
"""
|
|
determined_timeout = historic_timeout
|
|
|
|
override = self.timeout_overrides.lookup_idle_override(variant, task_name)
|
|
|
|
if idle_timeout and idle_timeout.total_seconds() != 0:
|
|
LOGGER.info("Using timeout from cmd line",
|
|
idle_timeout_secs=idle_timeout.total_seconds())
|
|
determined_timeout = idle_timeout
|
|
|
|
elif override is not None:
|
|
LOGGER.info("Overriding configured timeout", idle_timeout_secs=override.total_seconds())
|
|
determined_timeout = override
|
|
|
|
return determined_timeout
|
|
|
|
def determine_historic_timeout(self, project: str, task: str, variant: str, suite_name: str,
|
|
exec_timeout_factor: Optional[float]) -> TimeoutOverride:
|
|
"""
|
|
Calculate the timeout based on historic test results.
|
|
|
|
:param project: Name of project to query.
|
|
:param task: Name of task to query.
|
|
:param variant: Name of build variant to query.
|
|
:param suite_name: Name of test suite being run.
|
|
:param exec_timeout_factor: Scaling factor to use when determining timeout.
|
|
"""
|
|
if suite_name in IGNORED_SUITES:
|
|
return TimeoutOverride(task=task, exec_timeout=None, idle_timeout=None)
|
|
|
|
timeout_params = TimeoutParams(
|
|
evg_project=project,
|
|
build_variant=variant,
|
|
task_name=task,
|
|
suite_name=suite_name,
|
|
is_asan=self.is_build_variant_asan(variant),
|
|
)
|
|
timeout_estimate = self.timeout_service.get_timeout_estimate(timeout_params)
|
|
if timeout_estimate and timeout_estimate.is_specified():
|
|
exec_timeout = timeout_estimate.calculate_task_timeout(
|
|
repeat_factor=1, scaling_factor=exec_timeout_factor)
|
|
idle_timeout = timeout_estimate.calculate_test_timeout(repeat_factor=1)
|
|
if exec_timeout is not None or idle_timeout is not None:
|
|
LOGGER.info("Getting historic based timeout", exec_timeout_secs=exec_timeout,
|
|
idle_timeout_secs=idle_timeout)
|
|
return TimeoutOverride.from_seconds(task, exec_timeout, idle_timeout)
|
|
return TimeoutOverride(task=task, exec_timeout=None, idle_timeout=None)
|
|
|
|
def is_build_variant_asan(self, build_variant: str) -> bool:
|
|
"""
|
|
Determine if the given build variant is an ASAN build variant.
|
|
|
|
:param build_variant: Name of build variant to check.
|
|
:return: True if build variant is an ASAN build variant.
|
|
"""
|
|
bv = self.evg_project_config.get_variant(build_variant)
|
|
return bv.is_asan_build()
|
|
|
|
def _is_required_build_variant(self, build_variant: str) -> bool:
|
|
"""
|
|
Determine if the given build variants is a required build variant.
|
|
|
|
:param build_variant: Name of build variant to check.
|
|
:param evergreen_project_config: Evergreen config to query the variant name.
|
|
:return: True if the given build variant is required.
|
|
"""
|
|
bv = self.evg_project_config.get_variant(build_variant)
|
|
return "!" in bv.display_name
|
|
|
|
def determine_timeouts(self, cli_idle_timeout: Optional[timedelta],
|
|
cli_exec_timeout: Optional[timedelta], outfile: Optional[str],
|
|
project: str, task: str, variant: str, evg_alias: str, suite_name: str,
|
|
exec_timeout_factor: Optional[float]) -> None:
|
|
"""
|
|
Determine the timeouts to use for the given task and write timeouts to expansion file.
|
|
|
|
:param cli_idle_timeout: Idle timeout specified by the CLI.
|
|
:param cli_exec_timeout: Exec timeout specified by the CLI.
|
|
:param outfile: File to write timeout expansions to.
|
|
:param project: Evergreen project task is being run on.
|
|
:param task: Name of task.
|
|
:param variant: Build variant task is being run on.
|
|
:param evg_alias: Evergreen alias that triggered task.
|
|
:param suite_name: Name of evergreen suite being run.
|
|
:param exec_timeout_factor: Scaling factor to use when determining timeout.
|
|
"""
|
|
historic_timeout = self.determine_historic_timeout(project, task, variant, suite_name,
|
|
exec_timeout_factor)
|
|
|
|
idle_timeout = self.determine_idle_timeout(task, variant, cli_idle_timeout,
|
|
historic_timeout.get_idle_timeout())
|
|
exec_timeout = self.determine_exec_timeout(task, variant, idle_timeout, cli_exec_timeout,
|
|
evg_alias, historic_timeout.get_exec_timeout())
|
|
|
|
output_timeout(exec_timeout, idle_timeout, outfile)
|
|
|
|
|
|
def main():
|
|
"""Determine the timeout value a task should use in evergreen."""
|
|
parser = argparse.ArgumentParser(description=main.__doc__)
|
|
|
|
parser.add_argument("--install-dir", dest="install_dir", required=True,
|
|
help="Path to bin directory of testable installation")
|
|
parser.add_argument("--task-name", dest="task", required=True, help="Task being executed.")
|
|
parser.add_argument("--suite-name", dest="suite_name", required=True,
|
|
help="Resmoke suite being run against.")
|
|
parser.add_argument("--build-variant", dest="variant", required=True,
|
|
help="Build variant task is being executed on.")
|
|
parser.add_argument("--project", dest="project", required=True,
|
|
help="Evergreen project task is being executed on.")
|
|
parser.add_argument("--evg-alias", dest="evg_alias", required=True,
|
|
help="Evergreen alias used to trigger build.")
|
|
parser.add_argument("--timeout", dest="timeout", type=int, help="Timeout to use (in sec).")
|
|
parser.add_argument("--exec-timeout", dest="exec_timeout", type=int,
|
|
help="Exec timeout to use (in sec).")
|
|
parser.add_argument("--exec-timeout-factor", dest="exec_timeout_factor", type=float,
|
|
help="Exec timeout factor to use (in sec).")
|
|
parser.add_argument("--out-file", dest="outfile", help="File to write configuration to.")
|
|
parser.add_argument("--timeout-overrides", dest="timeout_overrides_file",
|
|
default=DEFAULT_TIMEOUT_OVERRIDES,
|
|
help="File containing timeout overrides to use.")
|
|
parser.add_argument("--evg-api-config", dest="evg_api_config",
|
|
default=DEFAULT_EVERGREEN_AUTH_CONFIG, help="Evergreen API config file.")
|
|
parser.add_argument("--evg-project-config", dest="evg_project_config",
|
|
default=DEFAULT_EVERGREEN_CONFIG, help="Evergreen project config file.")
|
|
|
|
options = parser.parse_args()
|
|
|
|
timeout_override = timedelta(seconds=options.timeout) if options.timeout else None
|
|
exec_timeout_override = timedelta(
|
|
seconds=options.exec_timeout) if options.exec_timeout else None
|
|
|
|
task_name = determine_task_base_name(options.task, options.variant)
|
|
timeout_overrides = TimeoutOverrides.from_yaml_file(
|
|
os.path.expanduser(options.timeout_overrides_file))
|
|
|
|
enable_logging(verbose=False)
|
|
LOGGER.info("Determining timeouts", cli_args=options)
|
|
|
|
def dependencies(binder: inject.Binder) -> None:
|
|
binder.bind(
|
|
EvergreenApi,
|
|
RetryingEvergreenApi.get_api(config_file=os.path.expanduser(options.evg_api_config)))
|
|
binder.bind(TimeoutOverrides, timeout_overrides)
|
|
binder.bind(EvergreenProjectConfig,
|
|
parse_evergreen_file(os.path.expanduser(options.evg_project_config)))
|
|
binder.bind(
|
|
ResmokeProxyService,
|
|
ResmokeProxyService(run_options=f"--installDir={shlex.quote(options.install_dir)}"))
|
|
|
|
inject.configure(dependencies)
|
|
|
|
task_timeout_orchestrator = inject.instance(TaskTimeoutOrchestrator)
|
|
task_timeout_orchestrator.determine_timeouts(
|
|
timeout_override, exec_timeout_override, options.outfile, options.project, task_name,
|
|
options.variant, options.evg_alias, options.suite_name, options.exec_timeout_factor)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|