mirror of https://github.com/mongodb/mongo
SERVER-63827: Determine task timeout at the start of task execution
This commit is contained in:
parent
1e72f6d25e
commit
0a079df588
|
|
@ -3,14 +3,35 @@
|
|||
|
||||
import argparse
|
||||
import math
|
||||
import os
|
||||
import sys
|
||||
from datetime import timedelta
|
||||
from typing import Optional
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import inject
|
||||
import structlog
|
||||
import yaml
|
||||
from pydantic import BaseModel
|
||||
from evergreen import EvergreenApi, RetryingEvergreenApi
|
||||
|
||||
from buildscripts.ciconfig.evergreen import (EvergreenProjectConfig, parse_evergreen_file)
|
||||
from buildscripts.timeouts.timeout_service import (TimeoutParams, TimeoutService, TimeoutSettings)
|
||||
from buildscripts.util.cmdutils import enable_logging
|
||||
from buildscripts.util.taskname import determine_task_base_name
|
||||
|
||||
LOGGER = structlog.get_logger(__name__)
|
||||
DEFAULT_TIMEOUT_OVERRIDES = "etc/evergreen_timeouts.yml"
|
||||
DEFAULT_EVERGREEN_CONFIG = "etc/evergreen.yml"
|
||||
DEFAULT_EVERGREEN_AUTH_CONFIG = "~/.evergreen.yml"
|
||||
COMMIT_QUEUE_ALIAS = "__commit_queue"
|
||||
UNITTEST_TASK = "run_unittests"
|
||||
IGNORED_SUITES = {
|
||||
"integration_tests_replset", "integration_tests_replset_ssl_auth", "integration_tests_sharded",
|
||||
"integration_tests_standalone", "integration_tests_standalone_audit", "mongos_test",
|
||||
"server_selection_json_test"
|
||||
}
|
||||
HISTORY_LOOKBACK = timedelta(weeks=2)
|
||||
|
||||
COMMIT_QUEUE_TIMEOUT = timedelta(minutes=40)
|
||||
DEFAULT_REQUIRED_BUILD_TIMEOUT = timedelta(hours=1, minutes=20)
|
||||
|
|
@ -19,45 +40,92 @@ DEFAULT_NON_REQUIRED_BUILD_TIMEOUT = timedelta(hours=2)
|
|||
# which is 5 mins 47 secs, excluding outliers below
|
||||
UNITTESTS_TIMEOUT = timedelta(minutes=12)
|
||||
|
||||
SPECIFIC_TASK_OVERRIDES = {
|
||||
"linux-64-debug": {"auth": timedelta(minutes=60)},
|
||||
"enterprise-windows-all-feature-flags-suggested": {
|
||||
"replica_sets_jscore_passthrough": timedelta(hours=3),
|
||||
"replica_sets_update_v1_oplog_jscore_passthrough": timedelta(hours=2, minutes=30),
|
||||
},
|
||||
"enterprise-windows-required": {
|
||||
"replica_sets_jscore_passthrough": timedelta(hours=3),
|
||||
"replica_sets_update_v1_oplog_jscore_passthrough": timedelta(hours=2, minutes=30),
|
||||
},
|
||||
"enterprise-windows-inmem": {"replica_sets_jscore_passthrough": timedelta(hours=3), },
|
||||
"enterprise-windows": {"replica_sets_jscore_passthrough": timedelta(hours=3), },
|
||||
"windows-debug-suggested": {
|
||||
"replica_sets_initsync_jscore_passthrough": timedelta(hours=2, minutes=30),
|
||||
"replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30),
|
||||
"replica_sets_update_v1_oplog_jscore_passthrough": timedelta(hours=2, minutes=30),
|
||||
},
|
||||
"windows": {
|
||||
"replica_sets": timedelta(hours=3),
|
||||
"replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30),
|
||||
},
|
||||
"ubuntu1804-debug-suggested": {"replica_sets_jscore_passthrough": timedelta(hours=3), },
|
||||
"enterprise-rhel-80-64-bit-coverage": {
|
||||
"replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30),
|
||||
},
|
||||
"macos": {"replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30), },
|
||||
"enterprise-macos": {"replica_sets_jscore_passthrough": timedelta(hours=2, minutes=30), },
|
||||
|
||||
# unittests outliers
|
||||
# repeated execution runs a suite 10 times
|
||||
"linux-64-repeated-execution": {UNITTEST_TASK: 10 * UNITTESTS_TIMEOUT},
|
||||
# some of the a/ub/t san variants need a little extra time
|
||||
"enterprise-ubuntu2004-debug-tsan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
|
||||
"ubuntu1804-asan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
|
||||
"ubuntu1804-ubsan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
|
||||
"ubuntu1804-debug-asan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
|
||||
"ubuntu1804-debug-aubsan-lite": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
|
||||
"ubuntu1804-debug-ubsan": {UNITTEST_TASK: 2 * UNITTESTS_TIMEOUT},
|
||||
}
|
||||
class TimeoutOverride(BaseModel):
|
||||
"""
|
||||
Specification for overriding a task timeout.
|
||||
|
||||
* task: Name of task to overide.
|
||||
* exec_timeout: Value to override exec timeout with.
|
||||
* idle_timeout: Value to override idle timeout with.
|
||||
"""
|
||||
|
||||
task: str
|
||||
exec_timeout: Optional[int] = None
|
||||
idle_timeout: Optional[int] = None
|
||||
|
||||
def get_exec_timeout(self) -> Optional[timedelta]:
|
||||
"""Get a timedelta of the exec timeout to use."""
|
||||
if self.exec_timeout is not None:
|
||||
return timedelta(minutes=self.exec_timeout)
|
||||
return None
|
||||
|
||||
def get_idle_timeout(self) -> Optional[timedelta]:
|
||||
"""Get a timedelta of the idle timeout to use."""
|
||||
if self.idle_timeout is not None:
|
||||
return timedelta(minutes=self.idle_timeout)
|
||||
return None
|
||||
|
||||
|
||||
class TimeoutOverrides(BaseModel):
|
||||
"""Collection of timeout overrides to apply."""
|
||||
|
||||
overrides: Dict[str, List[TimeoutOverride]]
|
||||
|
||||
@classmethod
|
||||
def from_yaml_file(cls, file_path: Path) -> "TimeoutOverrides":
|
||||
"""Read the timeout overrides from the given file."""
|
||||
with open(file_path) as file_handler:
|
||||
return cls(**yaml.safe_load(file_handler))
|
||||
|
||||
def _lookup_override(self, build_variant: str, task_name: str) -> Optional[TimeoutOverride]:
|
||||
"""
|
||||
Check if the given task on the given build variant has an override defined.
|
||||
|
||||
Note: If multiple overrides are found, an exception will be raised.
|
||||
|
||||
:param build_variant: Build Variant to check.
|
||||
:param task_name: Task name to check.
|
||||
:return: Timeout override if found.
|
||||
"""
|
||||
overrides = [
|
||||
override for override in self.overrides.get(build_variant, [])
|
||||
if override.task == task_name
|
||||
]
|
||||
if overrides:
|
||||
if len(overrides) > 1:
|
||||
LOGGER.error("Found multiple overrides for the same task",
|
||||
build_variant=build_variant, task=task_name,
|
||||
overrides=[override.dict() for override in overrides])
|
||||
raise ValueError(f"Found multiple overrides for '{task_name}' on '{build_variant}'")
|
||||
return overrides[0]
|
||||
return None
|
||||
|
||||
def lookup_exec_override(self, build_variant: str, task_name: str) -> Optional[timedelta]:
|
||||
"""
|
||||
Look up the exec timeout override of the given build variant/task.
|
||||
|
||||
:param build_variant: Build Variant to check.
|
||||
:param task_name: Task name to check.
|
||||
:return: Exec timeout override if found.
|
||||
"""
|
||||
override = self._lookup_override(build_variant, task_name)
|
||||
if override is not None:
|
||||
return override.get_exec_timeout()
|
||||
return None
|
||||
|
||||
def lookup_idle_override(self, build_variant: str, task_name: str) -> Optional[timedelta]:
|
||||
"""
|
||||
Look up the idle timeout override of the given build variant/task.
|
||||
|
||||
:param build_variant: Build Variant to check.
|
||||
:param task_name: Task name to check.
|
||||
:return: Idle timeout override if found.
|
||||
"""
|
||||
override = self._lookup_override(build_variant, task_name)
|
||||
if override is not None:
|
||||
return override.get_idle_timeout()
|
||||
return None
|
||||
|
||||
|
||||
def _is_required_build_variant(build_variant: str) -> bool:
|
||||
|
|
@ -70,63 +138,20 @@ def _is_required_build_variant(build_variant: str) -> bool:
|
|||
return build_variant.endswith("-required")
|
||||
|
||||
|
||||
def _has_override(variant: str, task_name: str) -> bool:
|
||||
"""
|
||||
Determine if the given task has a timeout override.
|
||||
|
||||
:param variant: Build Variant task is running on.
|
||||
:param task_name: Task to check.
|
||||
:return: True if override exists for task.
|
||||
"""
|
||||
return variant in SPECIFIC_TASK_OVERRIDES and task_name in SPECIFIC_TASK_OVERRIDES[variant]
|
||||
|
||||
|
||||
def determine_timeout(task_name: str, variant: str, idle_timeout: Optional[timedelta] = None,
|
||||
exec_timeout: Optional[timedelta] = None, evg_alias: str = '') -> timedelta:
|
||||
"""
|
||||
Determine what exec timeout should be used.
|
||||
|
||||
:param task_name: Name of task being run.
|
||||
:param variant: Name of build variant being run.
|
||||
:param idle_timeout: Idle timeout if specified.
|
||||
:param exec_timeout: Override to use for exec_timeout or 0 if no override.
|
||||
:param evg_alias: Evergreen alias running the task.
|
||||
:return: Exec timeout to use for running task.
|
||||
"""
|
||||
determined_timeout = DEFAULT_NON_REQUIRED_BUILD_TIMEOUT
|
||||
|
||||
if exec_timeout and exec_timeout.total_seconds() != 0:
|
||||
determined_timeout = exec_timeout
|
||||
|
||||
elif task_name == UNITTEST_TASK and not _has_override(variant, task_name):
|
||||
determined_timeout = UNITTESTS_TIMEOUT
|
||||
|
||||
elif evg_alias == COMMIT_QUEUE_ALIAS:
|
||||
determined_timeout = COMMIT_QUEUE_TIMEOUT
|
||||
|
||||
elif _has_override(variant, task_name):
|
||||
determined_timeout = SPECIFIC_TASK_OVERRIDES[variant][task_name]
|
||||
|
||||
elif _is_required_build_variant(variant):
|
||||
determined_timeout = DEFAULT_REQUIRED_BUILD_TIMEOUT
|
||||
|
||||
# The timeout needs to be at least as large as the idle timeout.
|
||||
if idle_timeout and determined_timeout.total_seconds() < idle_timeout.total_seconds():
|
||||
return idle_timeout
|
||||
|
||||
return determined_timeout
|
||||
|
||||
|
||||
def output_timeout(task_timeout: timedelta, output_file: Optional[str]) -> None:
|
||||
def output_timeout(exec_timeout: timedelta, idle_timeout: Optional[timedelta],
|
||||
output_file: Optional[str]) -> None:
|
||||
"""
|
||||
Output timeout configuration to the specified location.
|
||||
|
||||
:param task_timeout: Timeout to output.
|
||||
:param exec_timeout: Exec timeout to output.
|
||||
:param idle_timeout: Idle timeout to output.
|
||||
:param output_file: Location of output file to write.
|
||||
"""
|
||||
output = {
|
||||
"exec_timeout_secs": math.ceil(task_timeout.total_seconds()),
|
||||
"exec_timeout_secs": math.ceil(exec_timeout.total_seconds()),
|
||||
}
|
||||
if idle_timeout is not None:
|
||||
output["timeout_secs"] = math.ceil(idle_timeout.total_seconds())
|
||||
|
||||
if output_file:
|
||||
with open(output_file, "w") as outfile:
|
||||
|
|
@ -135,28 +160,216 @@ def output_timeout(task_timeout: timedelta, output_file: Optional[str]) -> None:
|
|||
yaml.dump(output, stream=sys.stdout, default_flow_style=False)
|
||||
|
||||
|
||||
class TaskTimeoutOrchestrator:
|
||||
"""An orchestrator for determining task timeouts."""
|
||||
|
||||
@inject.autoparams()
|
||||
def __init__(self, timeout_service: TimeoutService, timeout_overrides: TimeoutOverrides,
|
||||
evg_project_config: EvergreenProjectConfig) -> None:
|
||||
"""
|
||||
Initialize the orchestrator.
|
||||
|
||||
:param timeout_service: Service for calculating historic timeouts.
|
||||
:param timeout_overrides: Timeout overrides for specific tasks.
|
||||
"""
|
||||
self.timeout_service = timeout_service
|
||||
self.timeout_overrides = timeout_overrides
|
||||
self.evg_project_config = evg_project_config
|
||||
|
||||
def determine_exec_timeout(
|
||||
self, task_name: str, variant: str, idle_timeout: Optional[timedelta] = None,
|
||||
exec_timeout: Optional[timedelta] = None, evg_alias: str = "") -> timedelta:
|
||||
"""
|
||||
Determine what exec timeout should be used.
|
||||
|
||||
:param task_name: Name of task being run.
|
||||
:param variant: Name of build variant being run.
|
||||
:param idle_timeout: Idle timeout if specified.
|
||||
:param exec_timeout: Override to use for exec_timeout or 0 if no override.
|
||||
:param evg_alias: Evergreen alias running the task.
|
||||
:return: Exec timeout to use for running task.
|
||||
"""
|
||||
determined_timeout = DEFAULT_NON_REQUIRED_BUILD_TIMEOUT
|
||||
|
||||
override = self.timeout_overrides.lookup_exec_override(variant, task_name)
|
||||
|
||||
if exec_timeout and exec_timeout.total_seconds() != 0:
|
||||
LOGGER.info("Using timeout from cmd line",
|
||||
exec_timeout_secs=exec_timeout.total_seconds())
|
||||
determined_timeout = exec_timeout
|
||||
|
||||
elif task_name == UNITTEST_TASK and override is None:
|
||||
LOGGER.info("Overriding unittest timeout",
|
||||
exec_timeout_secs=UNITTESTS_TIMEOUT.total_seconds())
|
||||
determined_timeout = UNITTESTS_TIMEOUT
|
||||
|
||||
elif evg_alias == COMMIT_QUEUE_ALIAS:
|
||||
LOGGER.info("Overriding commit-queue timeout",
|
||||
exec_timeout_secs=COMMIT_QUEUE_TIMEOUT.total_seconds())
|
||||
determined_timeout = COMMIT_QUEUE_TIMEOUT
|
||||
|
||||
elif override is not None:
|
||||
LOGGER.info("Overriding configured timeout", exec_timeout_secs=override.total_seconds())
|
||||
determined_timeout = override
|
||||
|
||||
elif _is_required_build_variant(variant):
|
||||
LOGGER.info("Overriding required-builder timeout",
|
||||
exec_timeout_secs=DEFAULT_REQUIRED_BUILD_TIMEOUT.total_seconds())
|
||||
determined_timeout = DEFAULT_REQUIRED_BUILD_TIMEOUT
|
||||
|
||||
# The timeout needs to be at least as large as the idle timeout.
|
||||
if idle_timeout and determined_timeout.total_seconds() < idle_timeout.total_seconds():
|
||||
LOGGER.info("Making exec timeout as large as idle timeout",
|
||||
exec_timeout_secs=idle_timeout.total_seconds())
|
||||
return idle_timeout
|
||||
|
||||
return determined_timeout
|
||||
|
||||
def determine_idle_timeout(self, task_name: str, variant: str,
|
||||
idle_timeout: Optional[timedelta] = None) -> Optional[timedelta]:
|
||||
"""
|
||||
Determine what idle timeout should be used.
|
||||
|
||||
:param task_name: Name of task being run.
|
||||
:param variant: Name of build variant being run.
|
||||
:param idle_timeout: Override to use for idle_timeout.
|
||||
:return: Idle timeout to use for running task.
|
||||
"""
|
||||
determined_timeout = None
|
||||
override = self.timeout_overrides.lookup_idle_override(variant, task_name)
|
||||
|
||||
if idle_timeout and idle_timeout.total_seconds() != 0:
|
||||
LOGGER.info("Using timeout from cmd line",
|
||||
idle_timeout_secs=idle_timeout.total_seconds())
|
||||
determined_timeout = idle_timeout
|
||||
|
||||
elif override is not None:
|
||||
LOGGER.info("Overriding configured timeout", idle_timeout_secs=override.total_seconds())
|
||||
determined_timeout = override
|
||||
|
||||
return determined_timeout
|
||||
|
||||
def determine_historic_timeout(self, task: str, variant: str, suite_name: str,
|
||||
exec_timeout_factor: Optional[float]) -> Optional[timedelta]:
|
||||
"""
|
||||
Calculate the timeout based on historic test results.
|
||||
|
||||
:param task: Name of task to query.
|
||||
:param variant: Name of build variant to query.
|
||||
:param suite_name: Name of test suite being run.
|
||||
:param exec_timeout_factor: Scaling factor to use when determining timeout.
|
||||
"""
|
||||
if suite_name in IGNORED_SUITES:
|
||||
return None
|
||||
|
||||
timeout_params = TimeoutParams(
|
||||
evg_project="mongodb-mongo-master",
|
||||
build_variant=variant,
|
||||
task_name=task,
|
||||
suite_name=suite_name,
|
||||
is_asan=self.is_build_variant_asan(variant),
|
||||
)
|
||||
timeout_estimate = self.timeout_service.get_timeout_estimate(timeout_params)
|
||||
if timeout_estimate and timeout_estimate.is_specified():
|
||||
exec_timeout = timeout_estimate.calculate_task_timeout(
|
||||
repeat_factor=1, scaling_factor=exec_timeout_factor)
|
||||
if exec_timeout is not None:
|
||||
LOGGER.info("Using historic based timeout", exec_timeout_secs=exec_timeout)
|
||||
return timedelta(seconds=exec_timeout)
|
||||
return None
|
||||
|
||||
def is_build_variant_asan(self, build_variant: str) -> bool:
|
||||
"""
|
||||
Determine if the given build variant is an ASAN build variant.
|
||||
|
||||
:param build_variant: Name of build variant to check.
|
||||
:return: True if build variant is an ASAN build variant.
|
||||
"""
|
||||
bv = self.evg_project_config.get_variant(build_variant)
|
||||
return bv.is_asan_build()
|
||||
|
||||
def determine_timeouts(self, cli_idle_timeout: Optional[timedelta],
|
||||
cli_exec_timeout: Optional[timedelta], outfile: Optional[str], task: str,
|
||||
variant: str, evg_alias: str, suite_name: str,
|
||||
exec_timeout_factor: Optional[float]) -> None:
|
||||
"""
|
||||
Determine the timeouts to use for the given task and write timeouts to expansion file.
|
||||
|
||||
:param cli_idle_timeout: Idle timeout specified by the CLI.
|
||||
:param cli_exec_timeout: Exec timeout specified by the CLI.
|
||||
:param outfile: File to write timeout expansions to.
|
||||
:param variant: Build variant task is being run on.
|
||||
:param evg_alias: Evergreen alias that triggered task.
|
||||
:param suite_name: Name of evergreen suite being run.
|
||||
:param exec_timeout_factor: Scaling factor to use when determining timeout.
|
||||
"""
|
||||
idle_timeout = self.determine_idle_timeout(task, variant, cli_idle_timeout)
|
||||
exec_timeout = self.determine_exec_timeout(task, variant, idle_timeout, cli_exec_timeout,
|
||||
evg_alias)
|
||||
|
||||
historic_timeout = self.determine_historic_timeout(task, variant, suite_name,
|
||||
exec_timeout_factor)
|
||||
if historic_timeout:
|
||||
exec_timeout = historic_timeout
|
||||
|
||||
output_timeout(exec_timeout, idle_timeout, outfile)
|
||||
|
||||
|
||||
def main():
|
||||
"""Determine the timeout value a task should use in evergreen."""
|
||||
parser = argparse.ArgumentParser(description=main.__doc__)
|
||||
|
||||
parser.add_argument("--task-name", dest="task", required=True, help="Task being executed.")
|
||||
parser.add_argument("--suite-name", dest="suite_name", required=True,
|
||||
help="Resmoke suite being run against.")
|
||||
parser.add_argument("--build-variant", dest="variant", required=True,
|
||||
help="Build variant task is being executed on.")
|
||||
parser.add_argument("--evg-alias", dest="evg_alias", required=True,
|
||||
help="Evergreen alias used to trigger build.")
|
||||
parser.add_argument("--timeout", dest="timeout", type=int, help="Timeout to use (in sec).")
|
||||
parser.add_argument("--exec-timeout", dest="exec_timeout", type=int,
|
||||
help="Exec timeout ot use (in sec).")
|
||||
help="Exec timeout to use (in sec).")
|
||||
parser.add_argument("--exec-timeout-factor", dest="exec_timeout_factor", type=float,
|
||||
help="Exec timeout factor to use (in sec).")
|
||||
parser.add_argument("--out-file", dest="outfile", help="File to write configuration to.")
|
||||
parser.add_argument("--timeout-overrides", dest="timeout_overrides_file",
|
||||
default=DEFAULT_TIMEOUT_OVERRIDES,
|
||||
help="File containing timeout overrides to use.")
|
||||
parser.add_argument("--evg-api-config", dest="evg_api_config",
|
||||
default=DEFAULT_EVERGREEN_AUTH_CONFIG, help="Evergreen API config file.")
|
||||
parser.add_argument("--evg-project-config", dest="evg_project_config",
|
||||
default=DEFAULT_EVERGREEN_CONFIG, help="Evergreen project config file.")
|
||||
|
||||
options = parser.parse_args()
|
||||
|
||||
end_date = datetime.now()
|
||||
start_date = end_date - HISTORY_LOOKBACK
|
||||
|
||||
timeout_override = timedelta(seconds=options.timeout) if options.timeout else None
|
||||
exec_timeout_override = timedelta(
|
||||
seconds=options.exec_timeout) if options.exec_timeout else None
|
||||
task_timeout = determine_timeout(options.task, options.variant, timeout_override,
|
||||
exec_timeout_override, options.evg_alias)
|
||||
output_timeout(task_timeout, options.outfile)
|
||||
|
||||
task_name = determine_task_base_name(options.task, options.variant)
|
||||
timeout_overrides = TimeoutOverrides.from_yaml_file(
|
||||
os.path.expanduser(options.timeout_overrides_file))
|
||||
|
||||
enable_logging(verbose=False)
|
||||
|
||||
def dependencies(binder: inject.Binder) -> None:
|
||||
binder.bind(
|
||||
EvergreenApi,
|
||||
RetryingEvergreenApi.get_api(config_file=os.path.expanduser(options.evg_api_config)))
|
||||
binder.bind(TimeoutSettings, TimeoutSettings(start_date=start_date, end_date=end_date))
|
||||
binder.bind(TimeoutOverrides, timeout_overrides)
|
||||
binder.bind(EvergreenProjectConfig,
|
||||
parse_evergreen_file(os.path.expanduser(options.evg_project_config)))
|
||||
|
||||
inject.configure(dependencies)
|
||||
|
||||
task_timeout_orchestrator = inject.instance(TaskTimeoutOrchestrator)
|
||||
task_timeout_orchestrator.determine_timeouts(
|
||||
timeout_override, exec_timeout_override, options.outfile, task_name, options.variant,
|
||||
options.evg_alias, options.suite_name, options.exec_timeout_factor)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ from evergreen import EvergreenApi
|
|||
|
||||
from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService
|
||||
from buildscripts.task_generation.suite_split_strategies import SplitStrategy, FallbackStrategy
|
||||
from buildscripts.task_generation.timeout import TimeoutEstimate
|
||||
from buildscripts.timeouts.timeout import TimeoutEstimate
|
||||
from buildscripts.util import taskname
|
||||
from buildscripts.util.teststats import HistoricTaskData, TestRuntime, normalize_test_name
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ from buildscripts.task_generation.task_types.gentask_options import GenTaskOptio
|
|||
from buildscripts.task_generation.task_types.models.resmoke_task_model import ResmokeTask
|
||||
from buildscripts.task_generation.task_types.multiversion_decorator import MultiversionGenTaskDecorator, \
|
||||
MultiversionDecoratorParams
|
||||
from buildscripts.task_generation.timeout import TimeoutEstimate
|
||||
from buildscripts.timeouts.timeout import TimeoutEstimate
|
||||
|
||||
LOGGER = structlog.getLogger(__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,46 +1,213 @@
|
|||
"""Unit tests for the evergreen_task_timeout script."""
|
||||
from datetime import timedelta
|
||||
import unittest
|
||||
from datetime import timedelta
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import buildscripts.evergreen_task_timeout as under_test
|
||||
from buildscripts.ciconfig.evergreen import EvergreenProjectConfig
|
||||
from buildscripts.timeouts.timeout_service import TimeoutService
|
||||
|
||||
# pylint: disable=missing-docstring,no-self-use
|
||||
# pylint: disable=missing-docstring,no-self-use,invalid-name,protected-access
|
||||
|
||||
|
||||
class DetermineTimeoutTest(unittest.TestCase):
|
||||
class TestTimeoutOverride(unittest.TestCase):
|
||||
def test_exec_timeout_should_be_settable(self):
|
||||
timeout_override = under_test.TimeoutOverride(task="my task", exec_timeout=42)
|
||||
|
||||
timeout = timeout_override.get_exec_timeout()
|
||||
|
||||
self.assertIsNotNone(timeout)
|
||||
self.assertEqual(42 * 60, timeout.total_seconds())
|
||||
|
||||
def test_exec_timeout_should_default_to_none(self):
|
||||
timeout_override = under_test.TimeoutOverride(task="my task")
|
||||
|
||||
timeout = timeout_override.get_exec_timeout()
|
||||
|
||||
self.assertIsNone(timeout)
|
||||
|
||||
def test_idle_timeout_should_be_settable(self):
|
||||
timeout_override = under_test.TimeoutOverride(task="my task", idle_timeout=42)
|
||||
|
||||
timeout = timeout_override.get_idle_timeout()
|
||||
|
||||
self.assertIsNotNone(timeout)
|
||||
self.assertEqual(42 * 60, timeout.total_seconds())
|
||||
|
||||
def test_idle_timeout_should_default_to_none(self):
|
||||
timeout_override = under_test.TimeoutOverride(task="my task")
|
||||
|
||||
timeout = timeout_override.get_idle_timeout()
|
||||
|
||||
self.assertIsNone(timeout)
|
||||
|
||||
|
||||
class TestTimeoutOverrides(unittest.TestCase):
|
||||
def test_looking_up_a_non_existing_override_should_return_none(self):
|
||||
timeout_overrides = under_test.TimeoutOverrides(overrides={})
|
||||
|
||||
self.assertIsNone(timeout_overrides.lookup_exec_override("bv", "task"))
|
||||
self.assertIsNone(timeout_overrides.lookup_idle_override("bv", "task"))
|
||||
|
||||
def test_looking_up_a_duplicate_override_should_raise_error(self):
|
||||
timeout_overrides = under_test.TimeoutOverrides(
|
||||
overrides={
|
||||
"bv": [{
|
||||
"task": "task_name",
|
||||
"exec_timeout": 42,
|
||||
"idle_timeout": 10,
|
||||
}, {
|
||||
"task": "task_name",
|
||||
"exec_timeout": 314,
|
||||
"idle_timeout": 20,
|
||||
}]
|
||||
})
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
self.assertIsNone(timeout_overrides.lookup_exec_override("bv", "task_name"))
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
self.assertIsNone(timeout_overrides.lookup_idle_override("bv", "task_name"))
|
||||
|
||||
def test_looking_up_an_exec_override_should_work(self):
|
||||
timeout_overrides = under_test.TimeoutOverrides(
|
||||
overrides={
|
||||
"bv": [
|
||||
{
|
||||
"task": "another_task",
|
||||
"exec_timeout": 314,
|
||||
"idle_timeout": 20,
|
||||
},
|
||||
{
|
||||
"task": "task_name",
|
||||
"exec_timeout": 42,
|
||||
},
|
||||
]
|
||||
})
|
||||
|
||||
self.assertEqual(42 * 60,
|
||||
timeout_overrides.lookup_exec_override("bv", "task_name").total_seconds())
|
||||
|
||||
def test_looking_up_an_idle_override_should_work(self):
|
||||
timeout_overrides = under_test.TimeoutOverrides(
|
||||
overrides={
|
||||
"bv": [
|
||||
{
|
||||
"task": "another_task",
|
||||
"exec_timeout": 314,
|
||||
"idle_timeout": 20,
|
||||
},
|
||||
{
|
||||
"task": "task_name",
|
||||
"idle_timeout": 10,
|
||||
},
|
||||
]
|
||||
})
|
||||
|
||||
self.assertEqual(10 * 60,
|
||||
timeout_overrides.lookup_idle_override("bv", "task_name").total_seconds())
|
||||
|
||||
|
||||
class TestDetermineExecTimeout(unittest.TestCase):
|
||||
def test_timeout_used_if_specified(self):
|
||||
mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
|
||||
orchestrator = under_test.TaskTimeoutOrchestrator(
|
||||
timeout_service=MagicMock(spec_set=TimeoutService),
|
||||
timeout_overrides=mock_timeout_overrides,
|
||||
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
|
||||
timeout = timedelta(seconds=42)
|
||||
self.assertEqual(
|
||||
under_test.determine_timeout("task_name", "variant", None, timeout), timeout)
|
||||
orchestrator.determine_exec_timeout("task_name", "variant", None, timeout), timeout)
|
||||
|
||||
def test_default_is_returned_with_no_timeout(self):
|
||||
mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
|
||||
orchestrator = under_test.TaskTimeoutOrchestrator(
|
||||
timeout_service=MagicMock(spec_set=TimeoutService),
|
||||
timeout_overrides=mock_timeout_overrides,
|
||||
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
|
||||
self.assertEqual(
|
||||
under_test.determine_timeout("task_name", "variant"),
|
||||
orchestrator.determine_exec_timeout("task_name", "variant"),
|
||||
under_test.DEFAULT_NON_REQUIRED_BUILD_TIMEOUT)
|
||||
|
||||
def test_default_is_returned_with_timeout_at_zero(self):
|
||||
mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
|
||||
orchestrator = under_test.TaskTimeoutOrchestrator(
|
||||
timeout_service=MagicMock(spec_set=TimeoutService),
|
||||
timeout_overrides=mock_timeout_overrides,
|
||||
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
|
||||
self.assertEqual(
|
||||
under_test.determine_timeout("task_name", "variant", timedelta(seconds=0)),
|
||||
orchestrator.determine_exec_timeout("task_name", "variant", timedelta(seconds=0)),
|
||||
under_test.DEFAULT_NON_REQUIRED_BUILD_TIMEOUT)
|
||||
|
||||
def test_default_required_returned_on_required_variants(self):
|
||||
mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
|
||||
orchestrator = under_test.TaskTimeoutOrchestrator(
|
||||
timeout_service=MagicMock(spec_set=TimeoutService),
|
||||
timeout_overrides=mock_timeout_overrides,
|
||||
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
|
||||
self.assertEqual(
|
||||
under_test.determine_timeout("task_name", "variant-required"),
|
||||
orchestrator.determine_exec_timeout("task_name", "variant-required"),
|
||||
under_test.DEFAULT_REQUIRED_BUILD_TIMEOUT)
|
||||
|
||||
def test_task_specific_timeout(self):
|
||||
mock_timeout_overrides = under_test.TimeoutOverrides(
|
||||
overrides={"linux-64-debug": [{"task": "auth", "exec_timeout": 60}]})
|
||||
orchestrator = under_test.TaskTimeoutOrchestrator(
|
||||
timeout_service=MagicMock(spec_set=TimeoutService),
|
||||
timeout_overrides=mock_timeout_overrides,
|
||||
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
|
||||
self.assertEqual(
|
||||
under_test.determine_timeout("auth", "linux-64-debug"), timedelta(minutes=60))
|
||||
orchestrator.determine_exec_timeout("auth", "linux-64-debug"), timedelta(minutes=60))
|
||||
|
||||
def test_commit_queue_items_use_commit_queue_timeout(self):
|
||||
timeout = under_test.determine_timeout("auth", "variant",
|
||||
evg_alias=under_test.COMMIT_QUEUE_ALIAS)
|
||||
mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
|
||||
orchestrator = under_test.TaskTimeoutOrchestrator(
|
||||
timeout_service=MagicMock(spec_set=TimeoutService),
|
||||
timeout_overrides=mock_timeout_overrides,
|
||||
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
|
||||
timeout = orchestrator.determine_exec_timeout("auth", "variant",
|
||||
evg_alias=under_test.COMMIT_QUEUE_ALIAS)
|
||||
self.assertEqual(timeout, under_test.COMMIT_QUEUE_TIMEOUT)
|
||||
|
||||
def test_use_idle_timeout_if_greater_than_exec_timeout(self):
|
||||
mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
|
||||
orchestrator = under_test.TaskTimeoutOrchestrator(
|
||||
timeout_service=MagicMock(spec_set=TimeoutService),
|
||||
timeout_overrides=mock_timeout_overrides,
|
||||
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
|
||||
idle_timeout = timedelta(hours=2)
|
||||
exec_timeout = timedelta(minutes=10)
|
||||
timeout = under_test.determine_timeout("task_name", "variant", idle_timeout=idle_timeout,
|
||||
exec_timeout=exec_timeout)
|
||||
timeout = orchestrator.determine_exec_timeout(
|
||||
"task_name", "variant", idle_timeout=idle_timeout, exec_timeout=exec_timeout)
|
||||
|
||||
self.assertEqual(timeout, idle_timeout)
|
||||
|
||||
|
||||
class TestDetermineIdleTimeout(unittest.TestCase):
|
||||
def test_timeout_used_if_specified(self):
|
||||
mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
|
||||
orchestrator = under_test.TaskTimeoutOrchestrator(
|
||||
timeout_service=MagicMock(spec_set=TimeoutService),
|
||||
timeout_overrides=mock_timeout_overrides,
|
||||
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
|
||||
timeout = timedelta(seconds=42)
|
||||
self.assertEqual(
|
||||
orchestrator.determine_idle_timeout("task_name", "variant", timeout), timeout)
|
||||
|
||||
def test_default_is_returned_with_no_timeout(self):
|
||||
mock_timeout_overrides = under_test.TimeoutOverrides(overrides={})
|
||||
orchestrator = under_test.TaskTimeoutOrchestrator(
|
||||
timeout_service=MagicMock(spec_set=TimeoutService),
|
||||
timeout_overrides=mock_timeout_overrides,
|
||||
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
|
||||
self.assertIsNone(orchestrator.determine_idle_timeout("task_name", "variant"))
|
||||
|
||||
def test_task_specific_timeout(self):
|
||||
mock_timeout_overrides = under_test.TimeoutOverrides(
|
||||
overrides={"linux-64-debug": [{"task": "auth", "idle_timeout": 60}]})
|
||||
orchestrator = under_test.TaskTimeoutOrchestrator(
|
||||
timeout_service=MagicMock(spec_set=TimeoutService),
|
||||
timeout_overrides=mock_timeout_overrides,
|
||||
evg_project_config=MagicMock(spec_set=EvergreenProjectConfig))
|
||||
self.assertEqual(
|
||||
orchestrator.determine_idle_timeout("auth", "linux-64-debug"), timedelta(minutes=60))
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
"""Empty."""
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
"""Unit tests for timeout.py."""
|
||||
import unittest
|
||||
|
||||
from buildscripts.task_generation import timeout as under_test
|
||||
from buildscripts.timeouts import timeout as under_test
|
||||
|
||||
# pylint: disable=missing-docstring,invalid-name,unused-argument,no-self-use,protected-access,no-value-for-parameter
|
||||
|
||||
|
|
@ -0,0 +1,258 @@
|
|||
"""Unit tests for timeout_service.py."""
|
||||
import random
|
||||
import unittest
|
||||
from datetime import datetime, timedelta
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from requests.exceptions import HTTPError
|
||||
from evergreen import EvergreenApi
|
||||
|
||||
import buildscripts.timeouts.timeout_service as under_test
|
||||
from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService
|
||||
from buildscripts.util.teststats import HistoricTaskData
|
||||
|
||||
# pylint: disable=missing-docstring,no-self-use,invalid-name,protected-access
|
||||
|
||||
|
||||
def build_mock_service(evg_api=None, resmoke_proxy=None):
|
||||
end_date = datetime.now()
|
||||
start_date = end_date - timedelta(weeks=2)
|
||||
timeout_settings = under_test.TimeoutSettings(
|
||||
end_date=end_date,
|
||||
start_date=start_date,
|
||||
)
|
||||
return under_test.TimeoutService(
|
||||
evg_api=evg_api if evg_api else MagicMock(spec_set=EvergreenApi),
|
||||
resmoke_proxy=resmoke_proxy if resmoke_proxy else MagicMock(spec_set=ResmokeProxyService),
|
||||
timeout_settings=timeout_settings)
|
||||
|
||||
|
||||
def tst_stat_mock(file, duration, pass_count):
|
||||
return MagicMock(test_file=file, avg_duration_pass=duration, num_pass=pass_count)
|
||||
|
||||
|
||||
class TestGetTimeoutEstimate(unittest.TestCase):
|
||||
def test_no_stats_should_return_default_timeout(self):
|
||||
mock_evg_api = MagicMock(spec_set=EvergreenApi)
|
||||
mock_evg_api.test_stats_by_project.return_value = []
|
||||
timeout_service = build_mock_service(evg_api=mock_evg_api)
|
||||
timeout_params = under_test.TimeoutParams(
|
||||
evg_project="my project",
|
||||
build_variant="bv",
|
||||
task_name="my task",
|
||||
suite_name="my suite",
|
||||
is_asan=False,
|
||||
)
|
||||
|
||||
timeout = timeout_service.get_timeout_estimate(timeout_params)
|
||||
|
||||
self.assertFalse(timeout.is_specified())
|
||||
|
||||
def test_a_test_with_missing_history_should_cause_a_default_timeout(self):
|
||||
mock_evg_api = MagicMock(spec_set=EvergreenApi)
|
||||
test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)]
|
||||
mock_evg_api.test_stats_by_project.return_value = test_stats
|
||||
mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService)
|
||||
mock_resmoke_proxy.list_tests.return_value = ["test_with_no_stats.js"]
|
||||
timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy)
|
||||
timeout_params = under_test.TimeoutParams(
|
||||
evg_project="my project",
|
||||
build_variant="bv",
|
||||
task_name="my task",
|
||||
suite_name="my suite",
|
||||
is_asan=False,
|
||||
)
|
||||
|
||||
timeout = timeout_service.get_timeout_estimate(timeout_params)
|
||||
|
||||
self.assertFalse(timeout.is_specified())
|
||||
|
||||
def test_a_test_with_zero_runtime_history_should_cause_a_default_timeout(self):
|
||||
mock_evg_api = MagicMock(spec_set=EvergreenApi)
|
||||
test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)]
|
||||
test_stats.append(tst_stat_mock("zero.js", 0.0, 1))
|
||||
mock_evg_api.test_stats_by_project.return_value = test_stats
|
||||
mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService)
|
||||
mock_resmoke_proxy.list_tests.return_value = [ts.test_file for ts in test_stats]
|
||||
timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy)
|
||||
timeout_params = under_test.TimeoutParams(
|
||||
evg_project="my project",
|
||||
build_variant="bv",
|
||||
task_name="my task",
|
||||
suite_name="my suite",
|
||||
is_asan=False,
|
||||
)
|
||||
|
||||
timeout = timeout_service.get_timeout_estimate(timeout_params)
|
||||
|
||||
self.assertFalse(timeout.is_specified())
|
||||
|
||||
def test_all_tests_with_runtime_history_should_use_custom_timeout(self):
|
||||
mock_evg_api = MagicMock(spec_set=EvergreenApi)
|
||||
n_tests = 30
|
||||
test_runtime = 600
|
||||
test_stats = [tst_stat_mock(f"test_{i}.js", test_runtime, 1) for i in range(n_tests)]
|
||||
mock_evg_api.test_stats_by_project.return_value = test_stats
|
||||
mock_resmoke_proxy = MagicMock(spec_set=ResmokeProxyService)
|
||||
mock_resmoke_proxy.list_tests.return_value = [ts.test_file for ts in test_stats]
|
||||
timeout_service = build_mock_service(evg_api=mock_evg_api, resmoke_proxy=mock_resmoke_proxy)
|
||||
timeout_params = under_test.TimeoutParams(
|
||||
evg_project="my project",
|
||||
build_variant="bv",
|
||||
task_name="my task",
|
||||
suite_name="my suite",
|
||||
is_asan=False,
|
||||
)
|
||||
|
||||
timeout = timeout_service.get_timeout_estimate(timeout_params)
|
||||
|
||||
self.assertTrue(timeout.is_specified())
|
||||
self.assertEqual(1860, timeout.calculate_test_timeout(1))
|
||||
self.assertEqual(54180, timeout.calculate_task_timeout(1))
|
||||
|
||||
|
||||
class TestGetTaskHookOverhead(unittest.TestCase):
|
||||
def test_no_stats_should_return_zero(self):
|
||||
timeout_service = build_mock_service()
|
||||
|
||||
overhead = timeout_service.get_task_hook_overhead("suite", is_asan=False, test_count=30,
|
||||
historic_stats=None)
|
||||
|
||||
self.assertEqual(0.0, overhead)
|
||||
|
||||
def test_stats_with_no_clean_every_n_should_return_zero(self):
|
||||
timeout_service = build_mock_service()
|
||||
test_stats = HistoricTaskData.from_stats_list(
|
||||
[tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(30)])
|
||||
|
||||
overhead = timeout_service.get_task_hook_overhead("suite", is_asan=False, test_count=30,
|
||||
historic_stats=test_stats)
|
||||
|
||||
self.assertEqual(0.0, overhead)
|
||||
|
||||
def test_stats_with_clean_every_n_should_return_overhead(self):
|
||||
test_count = 30
|
||||
runtime = 25
|
||||
timeout_service = build_mock_service()
|
||||
test_stat_list = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(test_count)]
|
||||
test_stat_list.extend([
|
||||
tst_stat_mock(f"test_{i}:{under_test.CLEAN_EVERY_N_HOOK}", runtime, 1)
|
||||
for i in range(10)
|
||||
])
|
||||
random.shuffle(test_stat_list)
|
||||
test_stats = HistoricTaskData.from_stats_list(test_stat_list)
|
||||
|
||||
overhead = timeout_service.get_task_hook_overhead(
|
||||
"suite", is_asan=True, test_count=test_count, historic_stats=test_stats)
|
||||
|
||||
self.assertEqual(runtime * test_count, overhead)
|
||||
|
||||
|
||||
class TestLookupHistoricStats(unittest.TestCase):
|
||||
def test_no_stats_from_evergreen_should_return_none(self):
|
||||
mock_evg_api = MagicMock(spec_set=EvergreenApi)
|
||||
mock_evg_api.test_stats_by_project.return_value = []
|
||||
timeout_service = build_mock_service(evg_api=mock_evg_api)
|
||||
timeout_params = under_test.TimeoutParams(
|
||||
evg_project="my project",
|
||||
build_variant="bv",
|
||||
task_name="my task",
|
||||
suite_name="my suite",
|
||||
is_asan=False,
|
||||
)
|
||||
|
||||
stats = timeout_service.lookup_historic_stats(timeout_params)
|
||||
|
||||
self.assertIsNone(stats)
|
||||
|
||||
def test_errors_from_evergreen_should_return_none(self):
|
||||
mock_evg_api = MagicMock(spec_set=EvergreenApi)
|
||||
mock_evg_api.test_stats_by_project.side_effect = HTTPError("failed to connect")
|
||||
timeout_service = build_mock_service(evg_api=mock_evg_api)
|
||||
timeout_params = under_test.TimeoutParams(
|
||||
evg_project="my project",
|
||||
build_variant="bv",
|
||||
task_name="my task",
|
||||
suite_name="my suite",
|
||||
is_asan=False,
|
||||
)
|
||||
|
||||
stats = timeout_service.lookup_historic_stats(timeout_params)
|
||||
|
||||
self.assertIsNone(stats)
|
||||
|
||||
def test_stats_from_evergreen_should_return_the_stats(self):
|
||||
mock_evg_api = MagicMock(spec_set=EvergreenApi)
|
||||
test_stats = [tst_stat_mock(f"test_{i}.js", 60, 1) for i in range(100)]
|
||||
mock_evg_api.test_stats_by_project.return_value = test_stats
|
||||
timeout_service = build_mock_service(evg_api=mock_evg_api)
|
||||
timeout_params = under_test.TimeoutParams(
|
||||
evg_project="my project",
|
||||
build_variant="bv",
|
||||
task_name="my task",
|
||||
suite_name="my suite",
|
||||
is_asan=False,
|
||||
)
|
||||
|
||||
stats = timeout_service.lookup_historic_stats(timeout_params)
|
||||
|
||||
self.assertIsNotNone(stats)
|
||||
self.assertEqual(len(test_stats), len(stats.historic_test_results))
|
||||
|
||||
|
||||
class TestGetCleanEveryNCadence(unittest.TestCase):
|
||||
def test_clean_every_n_cadence_on_asan(self):
|
||||
timeout_service = build_mock_service()
|
||||
|
||||
cadence = timeout_service._get_clean_every_n_cadence("suite", True)
|
||||
|
||||
self.assertEqual(1, cadence)
|
||||
|
||||
def test_clean_every_n_cadence_from_hook_config(self):
|
||||
expected_n = 42
|
||||
mock_resmoke_proxy = MagicMock()
|
||||
mock_resmoke_proxy.read_suite_config.return_value = {
|
||||
"executor": {
|
||||
"hooks": [{
|
||||
"class": "hook1",
|
||||
}, {
|
||||
"class": under_test.CLEAN_EVERY_N_HOOK,
|
||||
"n": expected_n,
|
||||
}]
|
||||
}
|
||||
}
|
||||
timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy)
|
||||
|
||||
cadence = timeout_service._get_clean_every_n_cadence("suite", False)
|
||||
|
||||
self.assertEqual(expected_n, cadence)
|
||||
|
||||
def test_clean_every_n_cadence_no_n_in_hook_config(self):
|
||||
mock_resmoke_proxy = MagicMock()
|
||||
mock_resmoke_proxy.read_suite_config.return_value = {
|
||||
"executor": {
|
||||
"hooks": [{
|
||||
"class": "hook1",
|
||||
}, {
|
||||
"class": under_test.CLEAN_EVERY_N_HOOK,
|
||||
}]
|
||||
}
|
||||
}
|
||||
timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy)
|
||||
|
||||
cadence = timeout_service._get_clean_every_n_cadence("suite", False)
|
||||
|
||||
self.assertEqual(1, cadence)
|
||||
|
||||
def test_clean_every_n_cadence_no_hook_config(self):
|
||||
mock_resmoke_proxy = MagicMock()
|
||||
mock_resmoke_proxy.read_suite_config.return_value = {
|
||||
"executor": {"hooks": [{
|
||||
"class": "hook1",
|
||||
}, ]}
|
||||
}
|
||||
timeout_service = build_mock_service(resmoke_proxy=mock_resmoke_proxy)
|
||||
|
||||
cadence = timeout_service._get_clean_every_n_cadence("suite", False)
|
||||
|
||||
self.assertEqual(1, cadence)
|
||||
|
|
@ -4,7 +4,7 @@ import unittest
|
|||
|
||||
import buildscripts.util.taskname as under_test
|
||||
|
||||
# pylint: disable=missing-docstring,protected-access
|
||||
# pylint: disable=missing-docstring,protected-access,invalid-name
|
||||
|
||||
|
||||
class TestNameTask(unittest.TestCase):
|
||||
|
|
@ -24,3 +24,29 @@ class TestRemoveGenSuffix(unittest.TestCase):
|
|||
input_task_name = "sharded_multi_stmt_txn_jscore_passthroug"
|
||||
self.assertEqual("sharded_multi_stmt_txn_jscore_passthroug",
|
||||
under_test.remove_gen_suffix(input_task_name))
|
||||
|
||||
|
||||
class TestDetermineTaskBaseName(unittest.TestCase):
|
||||
def test_task_name_with_build_variant_should_strip_bv_and_sub_task_index(self):
|
||||
bv = "enterprise-rhel-80-64-bit-dynamic-required"
|
||||
task_name = f"auth_23_{bv}"
|
||||
|
||||
base_task_name = under_test.determine_task_base_name(task_name, bv)
|
||||
|
||||
self.assertEqual("auth", base_task_name)
|
||||
|
||||
def test_task_name_without_build_variant_should_strip_sub_task_index(self):
|
||||
bv = "enterprise-rhel-80-64-bit-dynamic-required"
|
||||
task_name = "auth_314"
|
||||
|
||||
base_task_name = under_test.determine_task_base_name(task_name, bv)
|
||||
|
||||
self.assertEqual("auth", base_task_name)
|
||||
|
||||
def test_task_name_without_build_variant_or_subtask_index_should_self(self):
|
||||
bv = "enterprise-rhel-80-64-bit-dynamic-required"
|
||||
task_name = "auth"
|
||||
|
||||
base_task_name = under_test.determine_task_base_name(task_name, bv)
|
||||
|
||||
self.assertEqual("auth", base_task_name)
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
"""Empty."""
|
||||
|
|
@ -1,11 +1,10 @@
|
|||
"""Timeout information for generating tasks."""
|
||||
import math
|
||||
from datetime import timedelta
|
||||
from inspect import getframeinfo, currentframe
|
||||
from inspect import currentframe, getframeinfo
|
||||
from typing import NamedTuple, Optional
|
||||
|
||||
import structlog
|
||||
|
||||
from buildscripts.patch_builds.task_generation import TimeoutInfo
|
||||
|
||||
LOGGER = structlog.getLogger(__name__)
|
||||
|
|
@ -0,0 +1,189 @@
|
|||
"""Service for determining task timeouts."""
|
||||
from datetime import datetime
|
||||
from typing import Any, Dict, NamedTuple, Optional
|
||||
|
||||
import inject
|
||||
import structlog
|
||||
from buildscripts.task_generation.resmoke_proxy import ResmokeProxyService
|
||||
from buildscripts.timeouts.timeout import TimeoutEstimate
|
||||
from buildscripts.util.teststats import HistoricTaskData
|
||||
from evergreen import EvergreenApi
|
||||
|
||||
LOGGER = structlog.get_logger(__name__)
|
||||
CLEAN_EVERY_N_HOOK = "CleanEveryN"
|
||||
|
||||
|
||||
class TimeoutParams(NamedTuple):
|
||||
"""
|
||||
Parameters about task being run.
|
||||
|
||||
* evg_project: Evergreen project.
|
||||
* build_variant: Evergreen build variant.
|
||||
* task_name: Evergreen task_name.
|
||||
* suite_name: Test Suite being run.
|
||||
* is_asan: Whether this run is part of an asan build.
|
||||
"""
|
||||
|
||||
evg_project: str
|
||||
build_variant: str
|
||||
task_name: str
|
||||
suite_name: str
|
||||
is_asan: bool
|
||||
|
||||
|
||||
class TimeoutSettings(NamedTuple):
|
||||
"""Settings for determining timeouts."""
|
||||
|
||||
start_date: datetime
|
||||
end_date: datetime
|
||||
|
||||
|
||||
class TimeoutService:
|
||||
"""A service for determining task timeouts."""
|
||||
|
||||
@inject.autoparams()
|
||||
def __init__(self, evg_api: EvergreenApi, resmoke_proxy: ResmokeProxyService,
|
||||
timeout_settings: TimeoutSettings) -> None:
|
||||
"""
|
||||
Initialize the service.
|
||||
|
||||
:param evg_api: Evergreen API client.
|
||||
:param resmoke_proxy: Proxy to query resmoke.
|
||||
:param timeout_settings: Settings for how timeouts are calculated.
|
||||
"""
|
||||
self.evg_api = evg_api
|
||||
self.resmoke_proxy = resmoke_proxy
|
||||
self.timeout_settings = timeout_settings
|
||||
|
||||
def get_timeout_estimate(self, timeout_params: TimeoutParams) -> TimeoutEstimate:
|
||||
"""
|
||||
Calculate the timeout estimate for the given task based on historic test results.
|
||||
|
||||
:param timeout_params: Details about the task to query.
|
||||
:return: Timeouts to use based on historic test results.
|
||||
"""
|
||||
historic_stats = self.lookup_historic_stats(timeout_params)
|
||||
if not historic_stats:
|
||||
return TimeoutEstimate.no_timeouts()
|
||||
|
||||
test_set = set(self.resmoke_proxy.list_tests(timeout_params.suite_name))
|
||||
test_runtimes = [
|
||||
stat for stat in historic_stats.get_tests_runtimes() if stat.test_name in test_set
|
||||
]
|
||||
test_runtime_set = {test.test_name for test in test_runtimes}
|
||||
for test in test_set:
|
||||
if test not in test_runtime_set:
|
||||
# If we don't have historic runtime information for all the tests, we cannot
|
||||
# reliable determine a timeout, so fallback to a default timeout.
|
||||
LOGGER.warning(
|
||||
"Could not find historic runtime information for test, using default timeout",
|
||||
test=test)
|
||||
return TimeoutEstimate.no_timeouts()
|
||||
|
||||
total_runtime = 0.0
|
||||
max_runtime = 0.0
|
||||
|
||||
for runtime in test_runtimes:
|
||||
if runtime.runtime > 0.0:
|
||||
total_runtime += runtime.runtime
|
||||
max_runtime = max(max_runtime, runtime.runtime)
|
||||
else:
|
||||
LOGGER.warning("Found a test with 0 runtime, using default timeouts",
|
||||
test=runtime.test_name)
|
||||
# We found a test with a runtime of 0, which indicates that it does not have a
|
||||
# proper runtime history, so fall back to a default timeout.
|
||||
return TimeoutEstimate.no_timeouts()
|
||||
|
||||
hook_overhead = self.get_task_hook_overhead(
|
||||
timeout_params.suite_name, timeout_params.is_asan, len(test_set), historic_stats)
|
||||
total_runtime += hook_overhead
|
||||
|
||||
return TimeoutEstimate(max_test_runtime=max_runtime, expected_task_runtime=total_runtime)
|
||||
|
||||
def get_task_hook_overhead(self, suite_name: str, is_asan: bool, test_count: int,
|
||||
historic_stats: Optional[HistoricTaskData]) -> float:
|
||||
"""
|
||||
Add how much overhead task-level hooks each suite should account for.
|
||||
|
||||
Certain test hooks need to be accounted for on the task level instead of the test level
|
||||
in order to calculate accurate timeouts. So we will add details about those hooks to
|
||||
each suite here.
|
||||
|
||||
:param suite_name: Name of suite being generated.
|
||||
:param is_asan: Whether ASAN is being used.
|
||||
:param test_count: Number of tests in sub-suite.
|
||||
:param historic_stats: Historic runtime data of the suite.
|
||||
"""
|
||||
# The CleanEveryN hook is run every 'N' tests. The runtime of the
|
||||
# hook will be associated with whichever test happens to be running, which could be
|
||||
# different every run. So we need to take its runtime into account at the task level.
|
||||
if historic_stats is None:
|
||||
return 0.0
|
||||
|
||||
clean_every_n_cadence = self._get_clean_every_n_cadence(suite_name, is_asan)
|
||||
avg_clean_every_n_runtime = historic_stats.get_avg_hook_runtime(CLEAN_EVERY_N_HOOK)
|
||||
LOGGER.debug("task hook overhead", cadence=clean_every_n_cadence,
|
||||
runtime=avg_clean_every_n_runtime, is_asan=is_asan)
|
||||
if avg_clean_every_n_runtime != 0:
|
||||
n_expected_runs = test_count / clean_every_n_cadence
|
||||
return n_expected_runs * avg_clean_every_n_runtime
|
||||
return 0.0
|
||||
|
||||
def lookup_historic_stats(self, timeout_params: TimeoutParams) -> Optional[HistoricTaskData]:
|
||||
"""
|
||||
Lookup historic test results stats for the given task.
|
||||
|
||||
:param timeout_params: Details about the task to lookup.
|
||||
:return: Historic test results if they exist.
|
||||
"""
|
||||
try:
|
||||
evg_stats = HistoricTaskData.from_evg(
|
||||
self.evg_api, timeout_params.evg_project, self.timeout_settings.start_date,
|
||||
self.timeout_settings.end_date, timeout_params.task_name,
|
||||
timeout_params.build_variant)
|
||||
if not evg_stats:
|
||||
LOGGER.warning("No historic runtime information available")
|
||||
return None
|
||||
return evg_stats
|
||||
except Exception: # pylint: disable=broad-except
|
||||
# If we have any trouble getting the historic runtime information, log the issue, but
|
||||
# don't fall back to default timeouts instead of failing.
|
||||
LOGGER.warning("Error querying history runtime information from evergreen",
|
||||
exc_info=True)
|
||||
return None
|
||||
|
||||
def _get_clean_every_n_cadence(self, suite_name: str, is_asan: bool) -> int:
|
||||
"""
|
||||
Get the N value for the CleanEveryN hook.
|
||||
|
||||
:param suite_name: Name of suite being generated.
|
||||
:param is_asan: Whether ASAN is being used.
|
||||
:return: How frequently clean every end is run.
|
||||
"""
|
||||
# Default to 1, which is the worst case meaning CleanEveryN would run for every test.
|
||||
clean_every_n_cadence = 1
|
||||
if is_asan:
|
||||
# ASAN runs hard-code N to 1. See `resmokelib/testing/hooks/cleanup.py`.
|
||||
return clean_every_n_cadence
|
||||
|
||||
clean_every_n_config = self._get_hook_config(suite_name, CLEAN_EVERY_N_HOOK)
|
||||
if clean_every_n_config:
|
||||
clean_every_n_cadence = clean_every_n_config.get("n", 1)
|
||||
|
||||
return clean_every_n_cadence
|
||||
|
||||
def _get_hook_config(self, suite_name: str, hook_name: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get the configuration for the given hook.
|
||||
|
||||
:param hook_name: Name of hook to query.
|
||||
:return: Configuration for hook, if it exists.
|
||||
"""
|
||||
hooks_config = self.resmoke_proxy.read_suite_config(suite_name).get("executor",
|
||||
{}).get("hooks")
|
||||
if hooks_config:
|
||||
for hook in hooks_config:
|
||||
if hook.get("class") == hook_name:
|
||||
return hook
|
||||
|
||||
return None
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
"""Functions for working with resmoke task names."""
|
||||
|
||||
import math
|
||||
import re
|
||||
|
||||
GEN_SUFFIX = "_gen"
|
||||
|
||||
|
|
@ -36,3 +37,25 @@ def remove_gen_suffix(task_name: str) -> str:
|
|||
if task_name.endswith(GEN_SUFFIX):
|
||||
return task_name[:-4]
|
||||
return task_name
|
||||
|
||||
|
||||
def determine_task_base_name(task_name: str, build_variant: str) -> str:
|
||||
"""
|
||||
Determine the base name of a task.
|
||||
|
||||
For generated tasks the base name will have the build variant and sub-task index
|
||||
stripped off. For other tasks, it is the unmodified task_name.
|
||||
|
||||
:param task_name: Name of task to get base name of.
|
||||
:param build_variant: Build variant that may be included in task name.
|
||||
:return: Base name of given task.
|
||||
"""
|
||||
match = re.match(f"(.*)_([0-9]+|misc)_{build_variant}", task_name)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
match = re.match(r"(.*)_([0-9]+|misc)", task_name)
|
||||
if match:
|
||||
return match.group(1)
|
||||
|
||||
return task_name
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
# Testing in Evergreen
|
||||
|
||||
Documentation about how MongoDB is tested in Evergreen.
|
||||
|
||||
* [Task Timeouts](task_timeouts.md)
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
# Evergreen Task Timeouts
|
||||
|
||||
## Type of timeouts
|
||||
|
||||
There are two types of timeouts that [evergreen supports](https://github.com/evergreen-ci/evergreen/wiki/Project-Commands#timeoutupdate):
|
||||
|
||||
* **Exec timeout**: The _exec_ timeout is the overall timeout for a task. Once the total runtime for
|
||||
a test hits this value, the timeout logic will be triggered. This value is specified by
|
||||
**exec_timeout_secs** in the evergreen configuration.
|
||||
* **Idle timeout**: The _idle_ timeout is the amount of time in which evergreen will wait for
|
||||
output to be created before it considers the task hung and triggers timeout logic. This value
|
||||
is specified by **timeout_secs** in the evergreen configuration.
|
||||
|
||||
**Note**: In most cases, **exec_timeout** is usually the more useful of the timeouts.
|
||||
|
||||
## Setting the timeout for a task
|
||||
|
||||
There are a few ways in which the timeout can be determined for a task running in evergreen.
|
||||
|
||||
* **Specified in 'etc/evergreen.yml'**: Timeout can be specified directly in the 'evergreen.yml' file,
|
||||
both on tasks and build variants. This can be useful for setting default timeout values, but is limited
|
||||
since different build variants frequently have different runtime characteristics and it is not possible
|
||||
to set timeouts for a task running on a specific build variant.
|
||||
|
||||
* **etc/evergreen_timeouts.yml**: The 'etc/evergreen_timeouts.yml' file for overriding timeouts
|
||||
for specific tasks on specific build variants. This provides a work-around for the limitations of
|
||||
specifying the timeouts directly in the 'evergreen.yml'. In order to use this method, the task
|
||||
must run the "determine task timeout" and "update task timeout expansions" functions at the beginning
|
||||
of the task evergreen definition. Most resmoke tasks already do this.
|
||||
|
||||
* **buildscripts/evergreen_task_timeout.py**: This is the script that reads the 'etc/evergreen_timeouts.yml'
|
||||
file and calculates the timeout to use. Additionally, it will check the historic test results of the
|
||||
task being run and see if there is enough information to calculate timeouts based on that. It can
|
||||
also be used for more advanced ways of determining timeouts (e.g. the script is used to set much
|
||||
more aggressive timeouts on tasks that are run in the commit-queue).
|
||||
|
|
@ -1218,6 +1218,9 @@ functions:
|
|||
- *update_resmoke_jobs_expansions
|
||||
- *f_expansions_write
|
||||
- *configure_evergreen_api_credentials
|
||||
- *determine_task_timeout
|
||||
- *update_task_timeout_expansions
|
||||
- *f_expansions_write
|
||||
- command: subprocess.exec
|
||||
params:
|
||||
binary: bash
|
||||
|
|
@ -1243,6 +1246,7 @@ functions:
|
|||
|
||||
"run tests":
|
||||
- *f_expansions_write
|
||||
- *configure_evergreen_api_credentials
|
||||
- *determine_task_timeout
|
||||
- *update_task_timeout_expansions
|
||||
- *f_expansions_write
|
||||
|
|
@ -2246,7 +2250,9 @@ tasks:
|
|||
- "./build/**.gcno"
|
||||
- "./etc/*san.suppressions"
|
||||
- "./etc/backports_required_for_multiversion_tests.yml"
|
||||
- "./etc/evergreen_timeouts.yml"
|
||||
- "./etc/expansions.default.yml"
|
||||
- "./etc/evergreen.yml"
|
||||
- "./etc/pip/**"
|
||||
- "./etc/repo_config.yaml"
|
||||
- "./etc/scons/**"
|
||||
|
|
|
|||
|
|
@ -0,0 +1,93 @@
|
|||
# This file defines timeouts in evergreen that will override the default timeouts.
|
||||
#
|
||||
# Each key under `overrides` provides the build variant where the override will occur. The
|
||||
# override should include the `task` that should have its timeout overridden and either the
|
||||
# `exec_timeout` to override or the `idle_timeout` to override.
|
||||
#
|
||||
# The timeouts should be specified in minutes.
|
||||
|
||||
# Note: In order to make it easier to find existing entries, please try to keep the build variants
|
||||
# in alphabetical order.
|
||||
|
||||
overrides:
|
||||
enterprise-macos:
|
||||
- task: replica_sets_jscore_passthrough
|
||||
exec_timeout: 150 # 2.5 hours
|
||||
|
||||
enterprise-rhel-80-64-bit-coverage:
|
||||
- task: replica_sets_jscore_passthrough
|
||||
exec_timeout: 150 # 2.5 hours.
|
||||
|
||||
enterprise-ubuntu2004-debug-tsan:
|
||||
- task: run_unittests
|
||||
exec_timeout: 24
|
||||
|
||||
enterprise-windows:
|
||||
- task: replica_sets_jscore_passthrough
|
||||
exec_timeout: 180 # 3 hours.
|
||||
|
||||
enterprise-windows-all-feature-flags-suggested:
|
||||
- task: replica_sets_jscore_passthrough
|
||||
exec_timeout: 180 # 3 hours.
|
||||
- task: replica_sets_update_v1_oplog_jscore_passthrough
|
||||
exec_timeout: 150 # 2.5 hours.
|
||||
|
||||
enterprise-windows-inmem:
|
||||
- task: replica_sets_jscore_passthrough
|
||||
exec_timeout: 180 # 3 hours.
|
||||
|
||||
enterprise-windows-required:
|
||||
- task: replica_sets_jscore_passthrough
|
||||
exec_timeout: 180 # 3 hours.
|
||||
- task: replica_sets_update_v1_oplog_jscore_passthrough
|
||||
exec_timeout: 150 # 2.5 hours.
|
||||
|
||||
linux-64-debug:
|
||||
- task: auth
|
||||
exec_timeout: 60 # 1 hour.
|
||||
|
||||
linux-64-debug-repeated-execution:
|
||||
- task: run_unittests
|
||||
exec_timeout: 120 # 2 hours.
|
||||
|
||||
macos:
|
||||
- task: replica_sets_jscore_passthrough
|
||||
exec_timeout: 150 # 2.5 hours
|
||||
|
||||
ubuntu1804-asan:
|
||||
- task: run_unittests
|
||||
exec_timeout: 24
|
||||
|
||||
ubuntu1804-debug-asan:
|
||||
- task: run_unittests
|
||||
exec_timeout: 24
|
||||
|
||||
ubuntu1804-debug-aubsan-lite:
|
||||
- task: run_unittests
|
||||
exec_timeout: 24
|
||||
|
||||
ubuntu1804-debug-ubsan:
|
||||
- task: run_unittests
|
||||
exec_timeout: 24
|
||||
|
||||
ubuntu1804-debug-suggested:
|
||||
- task: replica_sets_jscore_passthrough
|
||||
exec_timeout: 180 # 3 hours.
|
||||
|
||||
ubuntu1804-ubsan:
|
||||
- task: run_unittests
|
||||
exec_timeout: 24
|
||||
|
||||
windows:
|
||||
- task: replica_sets
|
||||
exec_timeout: 180 # 3 hours.
|
||||
- task: replica_sets_jscore_passthrough
|
||||
exec_timeout: 150 # 2.5 hours.
|
||||
|
||||
windows-debug-suggested:
|
||||
- task: replica_sets_initsync_jscore_passthrough
|
||||
exec_timeout: 150 # 2.5 hours.
|
||||
- task: replica_sets_jscore_passthrough
|
||||
exec_timeout: 180 # 3 hours.
|
||||
- task: replica_sets_update_v1_oplog_jscore_passthrough
|
||||
exec_timeout: 150 # 2.5 hours.
|
||||
|
|
@ -5,11 +5,25 @@ cd src
|
|||
|
||||
set -o verbose
|
||||
set -o errexit
|
||||
|
||||
# Set the suite name to be the task name by default; unless overridden with the `suite` expansion.
|
||||
suite_name=${task_name}
|
||||
if [[ -n ${suite} ]]; then
|
||||
suite_name=${suite}
|
||||
fi
|
||||
|
||||
timeout_factor=""
|
||||
if [[ -n "${exec_timeout_factor}" ]]; then
|
||||
timeout_factor="--exec-timeout-factor ${exec_timeout_factor}"
|
||||
fi
|
||||
|
||||
activate_venv
|
||||
$python buildscripts/evergreen_task_timeout.py \
|
||||
PATH=$PATH:$HOME:/ $python buildscripts/evergreen_task_timeout.py $timeout_factor \
|
||||
--task-name ${task_name} \
|
||||
--suite-name ${suite_name} \
|
||||
--build-variant ${build_variant} \
|
||||
--evg-alias '${alias}' \
|
||||
--timeout ${timeout_secs} \
|
||||
--exec-timeout ${exec_timeout_secs} \
|
||||
--evg-api-config ./.evergreen.yml \
|
||||
--out-file task_timeout_expansions.yml
|
||||
|
|
|
|||
Loading…
Reference in New Issue